###########################
###########################
### Replication data for ##
### Weeks 2022 ############
### Making Gender #########
### Salient: From #########
### Gender Quota Laws to ##
### Policy ################
###########################
###########################

### Analyses carried out in R version 4.1.2 (2021-11-01) -- base R

###########################
###########################
### Chapter 1 #############
###########################
###########################

### Synthetic control for Belgium 
### For Figure 1.1, Figure A1.1 (left panel)

mydata = read.csv("Belg.csv")
head(mydata)
mydata$belgsynth<-mydata$X.1

is.character(mydata$countryname)
mydata$countryname2<-as.factor(mydata$countryname)
mydata$countryn<-as.numeric(mydata$countryname2)

## drop rows outside election years 
mydata2 <- mydata[ which(mydata$belgsynth!='NA'), ] 
nrow(mydata2)
nrow(mydata)
mydata<-mydata2
mydata$countryname2<-as.character(mydata$countryname)

## drop Ireland (quota in 2016) and other unbalanced data countries
nrow(mydata)
summary(mydata)
mydata$X<-NULL
mydata$X.1<-NULL 

mydata2 <- mydata[ which(mydata$countryname!='Ireland'), ] 
nrow(mydata2)

mydata<-mydata2

##########################
######## Synthetic controls

library("Synth")

dataprep.out <-
              dataprep(foo = mydata,
                       predictors = c("gdp_d100", "share_partyquota","flfp") ,
                       predictors.op = "mean" ,
                       time.predictors.prior = 2:5 ,
                       special.predictors = list(
                         list("womenpar" , 2 , "mean"),
                         list("womenpar" , 3 , "mean"),
                         list("womenpar" , 4 , "mean"),
                         list("womenpar" , 5 , "mean")
                                                ),
                         dependent = "womenpar",
                       unit.variable = "countryn",
                       unit.names.variable = "countryname2",
                       time.variable = "belgsynth",
                       treatment.identifier = 3,
                       controls.identifier = c(1,2,4:8, 10:11, 13:16, 18),
                       time.optimize.ssr = 2:5,
                       time.plot = 1:9
                       )

 synth.out <- synth(data.prep.obj = dataprep.out,
                    method = "BFGS")

 synth.tables <- synth.tab(dataprep.res = dataprep.out,
                           synth.res = synth.out
                           )

### Table of sample mean vs treated and synthetic 
 synth.tables$tab.pred

### Table of weighted units
 synth.tables$tab.w


library(ggplot2)

### Figure 1.1 

#### PDF 

pdf("Fig1_1.pdf", width = 5, height = 5)
 path.plot(synth.res = synth.out,
           dataprep.res = dataprep.out,tr.intake=5,
           Ylab = "Percentage of women in national parliament",
           Xlab = "Elections",
           Ylim = c(0,50),
           Main = "Belgium",
           Legend = c("Belgium","synthetic Belgium"),
           Legend.position = "topleft"
           )
dev.off()


##actual trend
dataprep.out$Y1plot 

##synthetic trend
dataprep.out$Y0plot%*%synth.out$solution.w  

##difference (effect)
gaps<- dataprep.out$Y1plot-(
dataprep.out$Y0plot%*%synth.out$solution.w
) ; gaps


## Figure A1.1

mydata$unit<-NULL
mydata$unit[mydata$countryname=="Norway"]<-1
mydata$unit[mydata$countryname=="Australia"]<-2
mydata$unit[mydata$countryname=="Austria"]<-3
mydata$unit[mydata$countryname=="Canada"]<-4
mydata$unit[mydata$countryname=="Denmark"]<-5
mydata$unit[mydata$countryname=="Finland"]<-6
mydata$unit[mydata$countryname=="Germany"]<-7
mydata$unit[mydata$countryname=="Greece"]<-8
mydata$unit[mydata$countryname=="Italy"]<-9
mydata$unit[mydata$countryname=="Japan"]<-10
mydata$unit[mydata$countryname=="Netherlands"]<-11
mydata$unit[mydata$countryname=="New Zealand"]<-12
mydata$unit[mydata$countryname=="United Kingdom"]<-13
mydata$unit[mydata$countryname=="Sweden"]<-14
mydata$unit[mydata$countryname=="Belgium"]<-15

newdata <- mydata[order(mydata$unit, mydata$belgsynth),]
ddata<-newdata

store <- matrix(NA,length(1:9),14) #dataframe with 9 rows for periods / elections
colnames(store) <- unique(ddata$countryname)[-1]
#columns are countries 

# run placebo test
for(iter in 2:15)
 {
 dataprep.out <-
              dataprep(foo = ddata,
                       predictors = c("gdp_d100", "share_partyquota","flfp") ,
                       predictors.op = "mean" ,
                       time.predictors.prior = 2:5 ,
                       special.predictors = list(
                         list("womenpar" , 2 , "mean"),
                         list("womenpar" , 3 , "mean"),
                         list("womenpar" , 4 , "mean"),
                         list("womenpar" , 5 , "mean")
                                                ),

                         dependent = "womenpar",
                       unit.variable = "unit",
                       unit.names.variable = "countryname2",
                       time.variable = "belgsynth",
                       treatment.identifier = iter,
                       controls.identifier = c(2:15)[-iter+1],
                       time.optimize.ssr = 2:5,
                       time.plot = 1:9
                       )



# run synth
synth.out <- synth(
                   data.prep.obj = dataprep.out,
                   method = "BFGS"
                   )

# store gaps
store[,iter-1] <- dataprep.out$Y1plot - (dataprep.out$Y0plot %*% synth.out$solution.w)
}

# now do figure
data <- store
rownames(data) <- 1:9

# Set bounds in gaps data
gap.start     <- 1
gap.end       <- nrow(data)
years         <- 1:9
gap.end.pre  <- which(rownames(data)=="5")

#  MSPE Pre-Treatment
mse        <-             apply(data[ gap.start:gap.end.pre,]^2,2,mean)
basque.mse <- as.numeric(mse[14])
# Exclude states with 5 times higher MSPE than basque
data <- data[,mse<10000000*basque.mse] #gives 11 countries, 9% probability
Cex.set <- .75

# Plot
plot(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],
     ylim=c(-50,50),xlab="Elections",
     xlim=c(1,9),ylab="Change in percentage of women in parliament",
main="Belgium",
     type="l",lwd=2,col="black",
     xaxs="i",yaxs="i")

# Add lines for control states
for (i in 1:ncol(data)) { lines(years,data[gap.start:gap.end,i],col="gray") }

## Add line
lines(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],lwd=2,col="black")

# Add grid
abline(v=5,lty="dotted",lwd=2)
legend("bottomright",legend=c("Belgium","control countries"),
lty=c(1,1),col=c("black","gray"),lwd=c(2,1),cex=.8)
arrows(4,-10,5,-10,col="black",length=.1)
text(3.5,-10,"Quota Law",cex=Cex.set)

tiff("FigA1_1a.tiff", width = 5, height = 5, units = 'in', res = 1200)
plot(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],
     ylim=c(-50,50),xlab="Elections",
     xlim=c(1,9),ylab="Change in percentage of women in parliament",
main="Belgium",
     type="l",lwd=2,col="black",
     xaxs="i",yaxs="i")

# Add lines for control states
for (i in 1:ncol(data)) { lines(years,data[gap.start:gap.end,i],col="gray") }

## Add line
lines(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],lwd=2,col="black")

# Add grid
abline(v=5,lty="dotted",lwd=2)
legend("bottomright",legend=c("Belgium","control countries"),
lty=c(1,1),col=c("black","gray"),lwd=c(2,1),cex=.8)
arrows(4,-10,5,-10,col="black",length=.1)
text(3.5,-10,"Quota Law",cex=Cex.set)
dev.off()

####################### PDF 


pdf("FigA1_1a.pdf", width = 5, height = 5)
plot(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],
     ylim=c(-50,50),xlab="Elections",
     xlim=c(1,9),ylab="Change in percentage of women in parliament",
main="Belgium",
     type="l",lwd=2,col="black",
     xaxs="i",yaxs="i")

# Add lines for control states
for (i in 1:ncol(data)) { lines(years,data[gap.start:gap.end,i],col="gray") }

## Add line
lines(years,data[gap.start:gap.end,which(colnames(data)=="Belgium")],lwd=2,col="black")

# Add grid
abline(v=5,lty="dotted",lwd=2)
legend("bottomright",legend=c("Belgium","control countries"),
lty=c(1,1),col=c("black","gray"),lwd=c(2,1),cex=.8)
arrows(4,-10,5,-10,col="black",length=.1)
text(3.5,-10,"Quota Law",cex=Cex.set)
dev.off()

#########################################
############## Synethic control for Portugal 
## Figure 1.2, A1.1 (right panel)

mydata = read.csv("Port.csv")
head(mydata)

is.character(mydata$countryname)
mydata$countryname2<-as.factor(mydata$countryname)
mydata$countryn<-as.numeric(mydata$countryname2)

## drop rows outside election years 
mydata2 <- mydata[ which(mydata$portsynth!='NA'), ] 
nrow(mydata2)
nrow(mydata)
mydata<-mydata2
mydata$countryname2<-as.character(mydata$countryname)

## drop Ireland (quota in 2016) and Italy (quota implemented 2018
nrow(mydata)
summary(mydata)
mydata$X<-NULL

mydata2 <- mydata[ which(mydata$countryname!='Ireland'), ] 
nrow(mydata2)

mydata<-mydata2

mydata2 <- mydata[ which(mydata$countryname!='Italy'), ] 
nrow(mydata2)

mydata<-mydata2

## left with 14 countries 
table(mydata$countryname, mydata$countryn)

##########################
######## Synthetic controls

library("Synth")

dataprep.out <-
              dataprep(foo = mydata,
                       predictors = c("gdp_d100", "share_partyquota","flfp") ,
                       predictors.op = "mean" ,
                       time.predictors.prior = 2:5 ,
                       special.predictors = list(
                         list("womenpar" , 2 , "mean"),
                         list("womenpar" , 3 , "mean"),
                         list("womenpar" , 4 , "mean"),
                         list("womenpar" , 5 , "mean")
                                                ),
                         dependent = "womenpar",
                       unit.variable = "countryn",
                       unit.names.variable = "countryname2",
                       time.variable = "portsynth",
                       treatment.identifier = 15,
                       controls.identifier = c(1:7,10,12:14,16, 18),
                       time.optimize.ssr = 2:5,
                       time.plot = 1:7
                       )

 synth.out <- synth(data.prep.obj = dataprep.out,
                    method = "BFGS")

 synth.tables <- synth.tab(dataprep.res = dataprep.out,
                           synth.res = synth.out
                           )
### Table of treated and synthetic vales compared to sample mean

 synth.tables$tab.pred


### Table of unit weights 

 synth.tables$tab.w

## Figure 1.2

tiff("Fig1_2.tiff", width = 5, height = 5, units = 'in', res = 1200)

 path.plot(synth.res = synth.out,
           dataprep.res = dataprep.out,tr.intake=5,
           Ylab = "Percentage of women in national parliament",
           Xlab = "Elections",
           Ylim = c(0,50),
           Main = "Portugal",
           Legend = c("Portugal","synthetic Portugal"),
           Legend.position = "topleft"
           )
dev.off()

################### PDF 

pdf("Fig1_2.pdf", width = 5, height = 5)

 path.plot(synth.res = synth.out,
           dataprep.res = dataprep.out,tr.intake=5,
           Ylab = "Percentage of women in national parliament",
           Xlab = "Elections",
           Ylim = c(0,50),
           Main = "Portugal",
           Legend = c("Portugal","synthetic Portugal"),
           Legend.position = "topleft"
           )
dev.off()


##actual trend
dataprep.out$Y1plot 


##synthetic trend
dataprep.out$Y0plot%*%synth.out$solution.w  


##difference (effect)
gaps<- dataprep.out$Y1plot-(
dataprep.out$Y0plot%*%synth.out$solution.w
) ; gaps

## Figure A.1 (right panel)

mydata$unit<-NULL
mydata$unit[mydata$countryname=="Norway"]<-1
mydata$unit[mydata$countryname=="Australia"]<-2
mydata$unit[mydata$countryname=="Austria"]<-3
mydata$unit[mydata$countryname=="Canada"]<-4
mydata$unit[mydata$countryname=="Denmark"]<-5
mydata$unit[mydata$countryname=="Finland"]<-6
mydata$unit[mydata$countryname=="Germany"]<-7
mydata$unit[mydata$countryname=="Greece"]<-8
mydata$unit[mydata$countryname=="Japan"]<-9
mydata$unit[mydata$countryname=="Netherlands"]<-10
mydata$unit[mydata$countryname=="New Zealand"]<-11
mydata$unit[mydata$countryname=="Sweden"]<-12
mydata$unit[mydata$countryname=="United Kingdom"]<-13
mydata$unit[mydata$countryname=="Portugal"]<-14
table(mydata$unit)

newdata <- mydata[order(mydata$unit, mydata$portsynth),]
ddata<-newdata

store <- matrix(NA,length(1:7),13) #dataframe with 9 rows for periods / elections
colnames(store) <- unique(ddata$countryname)[-1]
#columns are countries 

# run placebo test
for(iter in 2:14)
 {
 dataprep.out <-
              dataprep(foo = ddata,
                       predictors = c("gdp_d100", "share_partyquota","flfp") ,
                       predictors.op = "mean" ,
                       time.predictors.prior = 2:5 ,
                       special.predictors = list(
                         list("womenpar" , 2 , "mean"),
                         list("womenpar" , 3 , "mean"),
                         list("womenpar" , 4 , "mean"),
                         list("womenpar" , 5 , "mean")
                                                ),

                         dependent = "womenpar",
                       unit.variable = "unit",
                       unit.names.variable = "countryname2",
                       time.variable = "portsynth",
                       treatment.identifier = iter,
                       controls.identifier = c(2:14)[-iter+1],
                       time.optimize.ssr = 2:5,
                       time.plot = 1:7
                       )



# run synth
synth.out <- synth(
                   data.prep.obj = dataprep.out,
                   method = "BFGS"
                   )

# store gaps
store[,iter-1] <- dataprep.out$Y1plot - (dataprep.out$Y0plot %*% synth.out$solution.w)
}

# now do figure
data <- store
rownames(data) <- 1:7

# Set bounds in gaps data
gap.start     <- 1
gap.end       <- nrow(data)
years         <- 1:7
gap.end.pre  <- which(rownames(data)=="5")

#  MSPE Pre-Treatment
mse        <-             apply(data[ gap.start:gap.end.pre,]^2,2,mean)
basque.mse <- as.numeric(mse[13])
# Exclude states with 5 times higher MSPE than basque
data <- data[,mse<10000000*basque.mse] #gives 11 countries, 9% probability
Cex.set <- .75

# Plot

tiff("FigA1_1b.tiff", width = 5, height = 5, units = 'in', res = 1200)

plot(years,data[gap.start:gap.end,which(colnames(data)=="Portugal")],
     ylim=c(-50,50),xlab="Elections",
     xlim=c(1,7),ylab="Change in percentage of women in parliament",
main="Portugal",
     type="l",lwd=2,col="black",
     xaxs="i",yaxs="i")

# Add lines for control states
for (i in 1:ncol(data)) { lines(years,data[gap.start:gap.end,i],col="gray") }

## Add line
lines(years,data[gap.start:gap.end,which(colnames(data)=="Portugal")],lwd=2,col="black")

# Add grid
abline(v=5,lty="dotted",lwd=2)
legend("bottomright",legend=c("Portugal","control countries"),
lty=c(1,1),col=c("black","gray"),lwd=c(2,1),cex=.8)
arrows(4,-10,5,-10,col="black",length=.1)
text(3.5,-10,"Quota Law",cex=Cex.set)

dev.off()

######################### PDF


pdf("FigA1_1b.pdf", width = 5, height = 5)

plot(years,data[gap.start:gap.end,which(colnames(data)=="Portugal")],
     ylim=c(-50,50),xlab="Elections",
     xlim=c(1,7),ylab="Change in percentage of women in parliament",
main="Portugal",
     type="l",lwd=2,col="black",
     xaxs="i",yaxs="i")

# Add lines for control states
for (i in 1:ncol(data)) { lines(years,data[gap.start:gap.end,i],col="gray") }

## Add line
lines(years,data[gap.start:gap.end,which(colnames(data)=="Portugal")],lwd=2,col="black")

# Add grid
abline(v=5,lty="dotted",lwd=2)
legend("bottomright",legend=c("Portugal","control countries"),
lty=c(1,1),col=c("black","gray"),lwd=c(2,1),cex=.8)
arrows(4,-10,5,-10,col="black",length=.1)
text(3.5,-10,"Quota Law",cex=Cex.set)

dev.off()

############################
### Code for matching ######
### Tables A1.1 and A1.2 ###
############################

library(MatchIt)

##Matching: Belgium
load("matching_belgium.RData") 
##data include nearest country-election-years to pre-quota belgium (1991)

nearest.match<-matchit(formula=belg ~ PR + lp_catho80 + womenpar + wlf + gni2 + w_s_partyquota, data=belgium, method="nearest", distance="mahalanobis")
nearest.match$match.matrix
#matches to Austria (row.name 65)

##to make Table A1.1
vars<-c("countryname","year", "womenpar", "gni2", "PR", "wlf", "lp_catho80", "w_s_partyquota")
X2<-belgium[,vars]
fix(X2)

################

##Matching: Portugal
load("matching_portugal.RData") 

##data include nearest country-election-years to pre-quota portugal (2005)

nearest.match<-matchit(formula=port ~ PR + lp_catho80 + womenpar + wlf + gni2 + w_s_partyquota, data=noq2, method="nearest", distance="mahalanobis")
nearest.match$match.matrix
## matches to Italy (row.name 654)

##to make Table A1.2
vars<-c("countryname","year", "womenpar", "gni2", "PR", "wlf", "lp_catho80", "w_s_partyquota")
X3<-noq2[,vars]
fix(X3)



###########################
###########################
### Chapter 3 #############
###########################
###########################

library(foreign)
library(ggplot2) 
library(collapse) 

################
########### Figure 2.1
################

### Role of Goverment data, 1985 to 2006

load("ISSP_all.RData")
head(dat)
table(dat$round, dat$year)

table(dat$V6)

model1<-lm(Responsibility_job ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model1)
confint(model1) 

model2<-lm(Responsibility_control_prices ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model2)
confint(model2)

model3<-lm(Spending_health ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model3)
confint(model3)

model4<-lm(Responsibility_inequality ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model4)
confint(model4)

model5<-lm(Spending_retirement ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model5)
confint(model5)

model6<-lm(Spending_unemployment ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model6)
confint(model6)

model7<-lm(Spending_education ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model7)
confint(model7)

model8<-lm(Spending_law_enf ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model8)
confint(model8)

model9<-lm(Spending_defense ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model9)
confint(model9)

model10<-lm(Spending_culture ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model10)
confint(model10)

model11<-lm(Cut_spending ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model11)
confint(model11)

model12<-lm(Spending_environment ~ gender + factor(round) + factor(V6), data=dat, weights = weight)
summary(model12)
confint(model12)

### Load the Family and Changing Gender Roles data  

load("ISSP_gen_all.RData")

model30 <- lm(prek_child ~ gender + factor(round) + factor(cntry), data=gen_all, weights = weight)
summary(model30)
confint(model30) 
model31 <- lm(working_warm ~ gender + factor(round) + factor(cntry), data=gen_all, weights = weight)
summary(model31)
confint(model31)
model33 <- lm(women_prefer_home ~ gender + factor(round) + factor(cntry), data=gen_all, weights = weight)
summary(model33)
confint(model33)
model36 <- lm(gendered_jobs ~ gender + factor(round) + factor(cntry), data=gen_all, weights = weight)
summary(model36)
confint(model36)

###put these into a csv to plot

mydata4 = read.csv("Gender_Gaps_Regression_Output2.csv")
head(mydata4)

###subset to first 12 issues

mydata4 <- mydata4[ which(mydata4$Model<13 | mydata4$Model ==30 
| mydata4$Model ==31
| mydata4$Model ==33
| mydata4$Model ==36),]


library(ggplot2)
library(gridExtra)

## reorder data

mydata_o=mydata4[order(mydata4$Coefficient),]
mydata_o$Model=factor(mydata_o$Model,levels=mydata_o$Model)
mydata_o$Q=factor(mydata_o$Q,levels=mydata_o$Q)

tiff("Fig3_1.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata_o, aes( x = factor(Model), y = Coefficient)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0)+ theme_classic() + 
  geom_errorbar(aes(ymax = UB, ymin = LB), size=1, width=0) + 
  xlab("") + ylab("")  + scale_x_discrete(labels= mydata_o$Q) + scale_y_continuous(breaks = seq(-.1, .12, by = .01)) +
  stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="black") +
  geom_hline(aes(yintercept=0), colour="black", linetype="solid") +
  theme(plot.title = element_text(size = 20, face = "bold", vjust = 1.5),
        axis.title.y = element_text(colour="grey20",size=14,face="bold"),
        axis.text.x = element_text(colour="grey20",size=12,face="bold"),
        axis.text.y = element_text(colour="grey20",size=12,face="bold"),  
        axis.title.x = element_text(colour="grey20",size=14,face="bold"), 
        axis.ticks.y = element_blank())

bp + coord_flip() 
dev.off()


########## PDF

pdf("Fig3_1.pdf", width = 10, height = 5)

bp<-ggplot(mydata_o, aes( x = factor(Model), y = Coefficient)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0)+ theme_classic() + 
  geom_errorbar(aes(ymax = UB, ymin = LB), size=1, width=0) + 
  xlab("") + ylab("")  + scale_x_discrete(labels= mydata_o$Q) + scale_y_continuous(breaks = seq(-.1, .12, by = .01)) +
  stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="black") +
  geom_hline(aes(yintercept=0), colour="black", linetype="solid") +
  theme(plot.title = element_text(size = 20, face = "bold", vjust = 1.5),
        axis.title.y = element_text(colour="grey20",size=14,face="bold"),
        axis.text.x = element_text(colour="grey20",size=12,face="bold"),
        axis.text.y = element_text(colour="grey20",size=12,face="bold"),  
        axis.title.x = element_text(colour="grey20",size=14,face="bold"), 
        axis.ticks.y = element_blank())

bp + coord_flip() 
dev.off()

################
########### Figure 3.2
################

## country-level gender gaps on maternal employment 


#####################
#####################
## Load the EVS data longitudinal file 
#####################
#####################

library(haven)
dat2<- read_dta("ZA4804.dta")
head(dat2)
nrow(dat2)
## 164 997

## drop first wave 
table(dat2$S002EVS)
dat2 <- dat2[ which(dat2$S002EVS!=1), ]
table(dat2$S002EVS)

## drop some countries out of sample
table(dat2$S003)
table(dat2$S009A) 


dat3 <- dat2[ which(dat2$S009A=="AT" | 
dat2$S009A=="BE" | 
dat2$S009A=="CA" | 
dat2$S009A=="CH" | 
dat2$S009A=="DE-E" | 
dat2$S009A=="DE-W" | 
dat2$S009A=="DK" | 
dat2$S009A=="ES" | 
dat2$S009A=="FR" | 
dat2$S009A=="GB-GBN" | 
dat2$S009A=="IE" | 
dat2$S009A=="IT" | 
dat2$S009A=="NL" | 
dat2$S009A=="NO" | 
dat2$S009A=="PT" | 
dat2$S009A=="SE" | 
dat2$S009A=="US" ), ]
nrow(dat3)
table(dat3$S009A)

## code weight 
table(dat3$S017)
dat3$weight<-dat3$S017

## code matemp
table(dat3$D061)
## (3 and 4 are disagree) 
dat3$matemp<-NA
dat3$matemp[dat3$D061==-1]<-0
dat3$matemp[dat3$D061==1]<-0
dat3$matemp[dat3$D061==2]<-0
dat3$matemp[dat3$D061==3]<-1
dat3$matemp[dat3$D061==4]<-1

table(dat3$matemp)

## code gender 
table(dat3$X001)
dat3$gender<-NA
dat3$gender[dat3$X001==1]<-0
dat3$gender[dat3$X001==2]<-1
table(dat3$gender)

## code education 
table(dat3$X025) ##7 and 8 are higher ed 
dat3$highedu<-NA
dat3$highedu[dat3$X025==1]<-0
dat3$highedu[dat3$X025==2]<-0
dat3$highedu[dat3$X025==3]<-0
dat3$highedu[dat3$X025==4]<-0
dat3$highedu[dat3$X025==5]<-0
dat3$highedu[dat3$X025==6]<-0
dat3$highedu[dat3$X025==7]<-1
dat3$highedu[dat3$X025==8]<-1
table(dat3$highedu)

## code class (income)
table(dat3$X047R)
dat3$lowinc<-NA
dat3$lowinc[dat3$X047R==1]<-1
dat3$lowinc[dat3$X047R==2]<-0
dat3$lowinc[dat3$X047R==3]<-0
table(dat3$lowinc)

dat3$highinc<-NA
dat3$highinc[dat3$X047R==1]<-0
dat3$highinc[dat3$X047R==2]<-0
dat3$highinc[dat3$X047R==3]<-3
table(dat3$highinc)

### Now for each round, calculate means and gaps by country for new analysis 

library(collapse)
test<- collap(dat3, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

###############################################
###############################################
### EVS round 2 ###############################
###############################################
###############################################

## restrict to round 2 
table(dat3$S002EVS)
r1<-dat3[which(dat3$S002EVS==2), ]

### here are the countries 
table(r1$S009A)
# AT     BE     CA   DE-E   DE-W     DK     ES     FR GB-GBN     IE     IT 
#  1460   2792   1730   1336   2101   1030   2637   1002   1484   1000   2018 
#    NL     NO     PT     SE     US 
#  1017   1239   1185   1047   1839 

#### country level means for this round 
table(r1$S009A)
reg<-r1[which(r1$S009A=="AT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="BE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009A)
reg<-r1[which(r1$S009A=="DE-E"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="DE-W"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="DE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="DK"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009A)
reg<-r1[which(r1$S009A=="ES"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="FR"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="GB-GBN"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="IE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="IT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="NL"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="NO"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="PT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="SE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009A=="US"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

###############################################
###############################################
### EVS round 3 ###############################
###############################################
###############################################

## restrict to round 3
table(dat3$S002EVS)
r1<-dat3[which(dat3$S002EVS==3), ]
nrow(r1)

### here are the countries 
table(r1$S009)
#  AT     BE     DE     DK     ES     FR GB-GBN     IE     IT     NL     PT 
#  1522   1912   2036   1023   1200   1615   1000   1012   2000   1003   1000 
#    SE 
#  1015 

reg<-r1[which(r1$S009=="AT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="BE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="DE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="DK"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="ES"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="FR"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="GB-GBN"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="IE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="IT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="PT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="SE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

###############################################
###############################################
### EVS round 4 ###############################
###############################################
###############################################

## restrict to round 3
table(dat3$S002EVS)
r1<-dat3[which(dat3$S002EVS==4), ]
nrow(r1)

### here are the countries 
table(r1$S009)
# AT     BE     CH     DE     DK     ES     FR GB-GBN     IE     IT     NL 
#  1510   1509   1272   2075   1507   1500   1501   1561   1013   1519   1554 
#    NO     PT     SE 
#  1090   1553   1187 

reg<-r1[which(r1$S009=="AT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="BE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009)
reg<-r1[which(r1$S009=="CH"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="DE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="DK"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009)

reg<-r1[which(r1$S009=="ES"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="FR"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009)

reg<-r1[which(r1$S009=="GB-GBN"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="IE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(r1$S009)

reg<-r1[which(r1$S009=="IT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="NL"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="NO"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="PT"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$S009=="SE"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

#################################
#################################
##### ISSP data #################
#################################
#################################

load("ISSP_gen_all.RData")

head(gen_all)

###############################################
###############################################
### round 1 ###############################
###############################################
###############################################

## restrict to round 1
table(gen_all$round)

r1<-gen_all[which(gen_all$round==1), ]
nrow(r1)

### here are the countries 
table(r1$cntry)

#       Austria        Germany        Ireland          Italy    Netherlands 
#           972           2994           1005           1028           1737 
# United Kingdom  United States 
#          1307           1414 

reg<-r1[which(r1$cntry=="Austria"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Italy"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Netherlands"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United Kingdom"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United States"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

###############################################
###############################################
### round 2 ###############################
###############################################
###############################################

## restrict to round 2
table(gen_all$round)

r1<-gen_all[which(gen_all$round==2), ]
nrow(r1)

### here are the countries 
table(r1$cntry)

# Australia        Austria         Canada        Germany        Ireland 
#          1779            977           1440           3421            938 
#         Italy          Japan    Netherlands    New Zealand         Norway 
#          1018           1307           1968           1047           2087 
#         Spain         Sweden United Kingdom  United States 
 #         2494           1272            984           1447 

reg<-r1[which(r1$cntry=="Australia"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Austria"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Canada"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Italy"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Japan"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Netherlands"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="New Zealand"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Norway"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Spain"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Sweden"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United Kingdom"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United States"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

###############################################
###############################################
### round 3 ###############################
###############################################
###############################################

## restrict to round 3
table(gen_all$round)

r1<-gen_all[which(gen_all$round==3), ]
nrow(r1)

### here are the countries 
table(r1$cntry)

#  Australia        Austria        Belgium        Denmark         France 
#          1352           2047           1360           1379           1903 
#       Germany        Ireland          Japan    Netherlands    New Zealand 
#          1367           1240           1132           1249           1025 
#        Norway       Portugal          Spain         Sweden    Switzerland 
#          1475           1092           2471           1080           1008 
#United Kingdom  United States 
#          1960           1171

reg<-r1[which(r1$cntry=="Australia"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Austria"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Belgium"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Denmark"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="France"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Japan"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Netherlands"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="New Zealand"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Norway"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Portugal"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Spain"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Sweden"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Switzerland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="United Kingdom"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United States"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


###############################################
###############################################
### round 4 ###############################
###############################################
###############################################

## restrict to round 4
table(gen_all$round)

r1<-gen_all[which(gen_all$round==4), ]
nrow(r1)

### here are the countries 
table(r1$cntry)

#  Australia        Austria        Belgium         Canada        Denmark 
#          1612           1182           2202            972           1403 
#        France        Germany        Ireland          Japan    Netherlands 
#          2409           1766           1215           1212           1315 
#        Norway       Portugal          Spain         Sweden    Switzerland 
#          1444            898           2595           1060           1237 
#United Kingdom  United States 
#           950           1302 

reg<-r1[which(r1$cntry=="Australia"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Austria"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Belgium"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Canada"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Denmark"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="France"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Japan"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Netherlands"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Norway"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Portugal"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="Sweden"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Switzerland"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="United Kingdom"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United States"), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


########################################
########################################
#### Make the figure of country-level gaps
########################################
########################################

## load the csv summarizing country level gaps 

f2<-read.csv("matemp_countrylevel_weights.csv")
head(f2) 

## make whole numbers not ratios 

f2$ml<-f2$mlevel*100
f2$fl<-f2$flevel*100
head(f2)
f2$gap2<-f2$fl - f2$ml
head(f2)

## collapse to the country level 

library(plyr)
newf<- ddply(f2, .(countryname), summarize,  gap=mean(gap2), level=mean(fl))

newf

### and input a new column for the label with levels 
### put this data in another csv 

f4<-read.csv("matemp_countrylevel_weights2.csv")
head(f4)

vars<-c("countryname", "var6", "gap2", "mat_emp_flevel")
dat<-na.omit(f4[vars])

library(ggplot2)

tiff("Fig3_2.tiff", width = 10, height = 5, units = 'in', res = 1200)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Country (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.55))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=18))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=18)) 
FINAL2
dev.off()
## ggsave("country_level_weights_evs_issp.pdf", width = 12, height = 7)

########################### PDF


pdf("Fig3_2.pdf", width = 10, height = 5)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Country (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.55))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=18))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=18)) 
FINAL2
dev.off()


################
########### Gender gaps over time 
################
gen_sub1<-gen_all[ which(gen_all$round==1),]
gen_sub2<-gen_all[ which(gen_all$round==2),]
gen_sub3<-gen_all[ which(gen_all$round==3),]
gen_sub4<-gen_all[ which(gen_all$round==4),]

nrow(gen_sub1)
model_1 <- lm(prek_child ~ gender + factor(cntry), data=gen_sub1, weights = weight)
summary(model_1)
confint(model_1)

model_2 <- lm(prek_child ~ gender + factor(cntry), data=gen_sub2, weights = weight)
summary(model_2)
confint(model_2)

model_3 <- lm(prek_child ~ gender + factor(cntry), data=gen_sub3, weights = weight)
summary(model_3)
confint(model_3)

model_4 <- lm(prek_child ~ gender + factor(cntry), data=gen_sub4, weights = weight)
summary(model_4)
confint(model_4)

### what are the levels of support for each round

test<- collap(gen_sub1, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

test<- collap(gen_sub2, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

test<- collap(gen_sub3, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

test<- collap(gen_sub4, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

#################################
#################################
#### Gender gaps by education ###
#################################
#################################

table(gen_all$high_ed)
reg<-gen_all[which(gen_all$high_ed==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(gen_all$high_ed)
reg<-gen_all[which(gen_all$high_ed==0), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

##################################
##################################
#### Figure 3.3 ##################
##################################
##################################

table(gen_all$left_c)
table(gen_all$left_all)
gen_all$left_far<-gen_all$left_all
gen_all$left_far[gen_all$left_c==1]<-0
table(gen_all$left_far)

table(gen_all$right_c)
table(gen_all$right_all)
gen_all$right_far<-gen_all$right_all
gen_all$right_far[gen_all$right_c==1]<-0
table(gen_all$right_far)

reg<-gen_all[which(gen_all$left_far==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-gen_all[which(gen_all$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-gen_all[which(gen_all$right_far==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-gen_all[which(gen_all$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

##### looking at far right and high ed
reg<-gen_all[which(gen_all$right_far==1 & gen_all$high_ed==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test 

gaps = read.csv("party_level_broad_weights.csv")
head(gaps) 

gaps$ml<-gaps$mlevel*100
gaps$fl<-gaps$flevel*100
gaps$gap2<-gaps$fl-gaps$ml
head(gaps)

vars<-c("party", "var6", "gap2", "fl")
dat<-na.omit(gaps[vars])

library(ggplot2)

tiff("Fig3_3.tiff", width = 10, height = 5, units = 'in', res = 1200)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Party (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.4))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=18))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=18))
FINAL2 
dev.off()

############################## PDF  

pdf("Fig3_3.pdf", width = 10, height = 5)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Party (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.4))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=18))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=18))
FINAL2 
dev.off()


#### Further analysis by party using ISSP 2002 data 

## restrict to round 3
table(gen_all$round)

r1<-gen_all[which(gen_all$round==3), ]
nrow(r1)

### here are the countries 
table(r1$cntry)

#  Australia        Austria        Belgium        Denmark         France 
#          1352           2047           1360           1379           1903 
#       Germany        Ireland          Japan    Netherlands    New Zealand 
#          1367           1240           1132           1249           1025 
#        Norway       Portugal          Spain         Sweden    Switzerland 
#          1475           1092           2471           1080           1008 
#United Kingdom  United States 
#          1960           1171

library(collapse)

reg<-r1[which(r1$cntry=="Australia" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Australia" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

table(reg$left_c)
reg<-r1[which(r1$cntry=="Austria" & r1$left_c==1), ]
nrow(reg) ## can't do Austria 

reg<-r1[which(r1$cntry=="Belgium"& r1$left_c==1), ]
nrow(reg) ## or Belgium 

reg<-r1[which(r1$cntry=="Denmark" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Denmark" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="France" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="France" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"& r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Germany"& r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Ireland" & r1$right_c==1), ]
nrow(reg)
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="Japan" & r1$left_c==1), ]
nrow(reg) ## can't do japan 

reg<-r1[which(r1$cntry=="Netherlands" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Netherlands" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test ## N is too small 

reg<-r1[which(r1$cntry=="New Zealand" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="New Zealand" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="Norway" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Norway" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Portugal" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Portugal" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Spain" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="Spain" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Sweden" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Sweden" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Switzerland" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="Switzerland" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United Kingdom" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="United Kingdom" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


reg<-r1[which(r1$cntry=="United States" & r1$left_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

reg<-r1[which(r1$cntry=="United States" & r1$right_c==1), ]
nrow(reg) ## check that nrow matches 
test<- collap(reg, prek_child~ gender, fmean, w = ~ weight, keep.w = FALSE)
test


##### CSES data #####
##### for analysis of priorities ####


dat <- read.csv("cses3.csv")
head(dat)
nrow(dat) ##

## The variables are: 
#C2002         >>> D2.  GENDER OF RESPONDENT
#  1. MALE
 #            2. FEMALE

table(dat$C2002)

dat$female<-NA
dat$female[dat$C2002==1]<-0
dat$female[dat$C2002==2]<-1
table(dat$female)

table(dat$C1010_1)

dat$weight<-dat$C1010_1


## C1006_NAM     >>> ID COMPONENT - POLITY NAME
table(dat$C1006_NAM)

dat$countryname<-dat$C1006_NAM
table(dat$countryname)

### | NOTES: C3002_1-C3002_2 -- note that the second one of these is "facing the country"
### I will use most important to you personally 

dat$family<-0
dat$family2<-0

## restrict to 15 included of the 19 countries I include in other analysis which have data
## and note further exlusion of countries bc either offer closed list of categories
## and do not include work-family issues, or code responses afterwards, and work-family is not included
##  (might be in eg social policies)
## : Australia, France, Germany, Ireland, New Zealand, USA

## 9 countries are included 

nrow(dat)
dat2<-dat[which(dat$countryname=="Canada" | dat$countryname=="Denmark" |
 dat$countryname=="Japan" |
dat$countryname=="Netherlands" | 
dat$countryname=="Norway" | dat$countryname=="Portugal" |
dat$countryname=="Spain" | dat$countryname=="Sweden" |
dat$countryname=="Switzerland" ), ]

nrow(dat2)
head(dat2)
table(dat2$countryname)
nrow(dat2)

### Need to code a binary variable for whether the respondent mentions
### work-family issues as the most importand issue 
### one var for most imp, another if mentioned as 1st or 2nd imp 

### Do this by country, because the coding processes vary 
### see the election study notes here 
### file:///C:/Users/acw64/Downloads/ZA5181_cdb_2_variables.pdf

  ##       Q1a.  What has been the most important issue to you personally
   ##      in this election?
     ##    Q1b.  What has been the second most important issue to you 
       ##  personally in this election?               
 ##        ..................................................................

   ##          001.-899. MOST IMPORTANT PROBLEM CODES 
  ##                     [SEE ELECTION STUDY NOTES]

  ##           900.      OTHER PROBLEM (NOT SPECIFIABLE)
  ##           901.      NO PROBLEM

   ##          997.      VOLUNTEERED: REFUSED
   ##          998.      VOLUNTEERED: DON'T KNOW

   ##          999.      MISSING

table(dat$C3001_1)

table(dat$C3001_2)


### 9 countries 


#### Canada
 
table(dat2$C3001_1[dat2$countryname=="Canada"], dat2$female[dat2$countryname=="Canada"]) 
table(dat2$C3001_2[dat2$countryname=="Canada"], dat2$female[dat2$countryname=="Canada"]) 
### 2nd most imp is not asked 

062.     Family benefits, childcare funding & programs


dat$family[dat$countryname=="Canada" & dat$C3001_1==62]<-1
table(dat$family)
table(dat$family, dat$female) 
##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Canada" & dat$C3001_1==62]<-1
table(dat$family2)
table(dat$family2, dat$female)

### Denmark --  
table(dat$C3001_1[dat$countryname=="Denmark"], dat$female[dat$countryname=="Denmark"]) 
table(dat$C3001_2[dat$countryname=="Denmark"], dat$female[dat$countryname=="Denmark"]) 

 ## 132.     Families with children/ child care

dat$family[dat$countryname=="Denmark" & dat$C3001_1==132]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Denmark" & dat$C3001_1==132]<-1
dat$family2[dat$countryname=="Denmark" & dat$C3001_2==132]<-1
table(dat$family2)
table(dat$family2, dat$female)
table(dat$family2, dat$countryname)

### Japan 
table(dat$C3001_1[dat$countryname=="Japan"], dat$female[dat$countryname=="Japan"]) 
table(dat$C3001_2[dat$countryname=="Japan"], dat$female[dat$countryname=="Japan"])

### 22 birth dearth 

dat$family[dat$countryname=="Japan" & dat$C3001_1==22]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Japan" & dat$C3001_1==22]<-1
dat$family2[dat$countryname=="Japan" & dat$C3001_2==22]<-1
table(dat$family2)
table(dat$family2, dat$female)
table(dat$family2, dat$countryname)

#### Netherlands
table(dat$C3001_1[dat$countryname=="Netherlands"], dat$female[dat$countryname=="Netherlands"]) 
table(dat$C3001_2[dat$countryname=="Netherlands"], dat$female[dat$countryname=="Netherlands"])

###104.     Family policy / childcare

dat$family[dat$countryname=="Netherlands" & dat$C3001_1==104]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Netherlands" & dat$C3001_1==104]<-1
dat$family2[dat$countryname=="Netherlands" & dat$C3001_2==104]<-1
table(dat$family2)
table(dat$family2, dat$female)
table(dat$family2, dat$countryname)

### Norway 
table(dat$C3001_1[dat$countryname=="Norway"], dat$female[dat$countryname=="Norway"]) 
table(dat$C3001_2[dat$countryname=="Norway"], dat$female[dat$countryname=="Norway"])

# 031.     Kindergartens
 #        |       032.     Cash benefit for families with small children
  #       |       035.     (Other) child and family issues

dat$family[dat$countryname=="Norway" & dat$C3001_1==31]<-1
dat$family[dat$countryname=="Norway" & dat$C3001_1==32]<-1
dat$family[dat$countryname=="Norway" & dat$C3001_1==35]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Norway" & dat$C3001_1==31]<-1
dat$family2[dat$countryname=="Norway" & dat$C3001_2==31]<-1
dat$family2[dat$countryname=="Norway" & dat$C3001_1==32]<-1
dat$family2[dat$countryname=="Norway" & dat$C3001_2==32]<-1
dat$family2[dat$countryname=="Norway" & dat$C3001_1==35]<-1
dat$family2[dat$countryname=="Norway" & dat$C3001_2==35]<-1
table(dat$family2)
table(dat$family2, dat$female)
table(dat$family2, dat$countryname)


### Portugal
table(dat$C3001_1[dat$countryname=="Portugal"], dat$female[dat$countryname=="Portugal"]) 
table(dat$C3001_2[dat$countryname=="Portugal"], dat$female[dat$countryname=="Portugal"])

### 018.     Support for the elderly, children and other 
##         |                groups

dat$family[dat$countryname=="Portugal" & dat$C3001_1==18]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

##bottom row shows gender gap in priorities

dat$family2[dat$countryname=="Portugal" & dat$C3001_1==18]<-1
dat$family2[dat$countryname=="Portugal" & dat$C3001_2==18]<-1

table(dat$family2)
table(dat$family2, dat$female)
table(dat$family2, dat$countryname)

#### Spain
table(dat$C3001_1[dat$countryname=="Spain"], dat$female[dat$countryname=="Spain"]) 
table(dat$C3001_2[dat$countryname=="Spain"], dat$female[dat$countryname=="Spain"])

table(dat$C3001_1[dat$C1006_NAM=="Spain"], dat$female[dat$C1006_NAM=="Spain"]) 

### 016.     Family policy

dat$family[dat$countryname=="Spain" & dat$C3001_1==16]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

dat$family2[dat$countryname=="Spain" & dat$C3001_1==16]<-1
dat$family2[dat$countryname=="Spain" & dat$C3001_2==16]<-1

table(dat$family2)
table(dat$family2, dat$female) 
table(dat$family2, dat$countryname) 


#### Sweden
table(dat$C3001_1[dat$countryname=="Sweden"], dat$female[dat$countryname=="Sweden"]) 
table(dat$C3001_2[dat$countryname=="Sweden"], dat$female[dat$countryname=="Sweden"])

# 043.     Care
# 085.     Family policy / child care
 #        |       086.     Child-care allowance
 #        |       089.     Parents' insurance
  #       |       092.     Daycare centre

dat$family[dat$countryname=="Sweden" & dat$C3001_1==43]<-1
dat$family[dat$countryname=="Sweden" & dat$C3001_1==85]<-1
dat$family[dat$countryname=="Sweden" & dat$C3001_1==86]<-1
dat$family[dat$countryname=="Sweden" & dat$C3001_1==89]<-1
dat$family[dat$countryname=="Sweden" & dat$C3001_1==92]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

dat$family2[dat$countryname=="Sweden" & dat$C3001_1==43]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_2==43]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_1==85]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_2==85]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_1==86]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_2==86]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_1==89]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_2==89]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_1==92]<-1
dat$family2[dat$countryname=="Sweden" & dat$C3001_2==92]<-1
table(dat$family2)
table(dat$family2, dat$female) 
table(dat$family2, dat$countryname) 

#### Switzerland
table(dat$C3001_1[dat$countryname=="Switzerland"], dat$female[dat$countryname=="Switzerland"]) 
table(dat$C3001_2[dat$countryname=="Switzerland"], dat$female[dat$countryname=="Switzerland"])
## didn't really ask 2nd most imp 

## 046.     Family policy

dat$family[dat$countryname=="Switzerland" & dat$C3001_1==46]<-1
table(dat$family)
table(dat$family, dat$female) 
table(dat$family, dat$countryname) 

dat$family2[dat$countryname=="Switzerland" & dat$C3001_1==46]<-1
table(dat$family2)
table(dat$family2, dat$female) 
table(dat$family2, dat$countryname) 


###############################
###############################
### restrict to only those countries where family option was included 

nrow(dat)
dat2<-dat[which(dat$countryname=="Canada" | dat$countryname=="Denmark" |
 dat$countryname=="Japan" |
dat$countryname=="Netherlands" | 
dat$countryname=="Norway" | dat$countryname=="Portugal" |
dat$countryname=="Spain" | dat$countryname=="Sweden" |
dat$countryname=="Switzerland" ), ]

nrow(dat2)
head(dat2)
table(dat2$countryname)

table(dat2$family, dat2$female) ### 131 men and 305 women (70% of those who said this were women)
table(dat2$family2, dat2$female) ### 230 men and 487 women (68%)
## 

## take out the missing / other -- 
##900. OTHER PROBLEM (NOT SPECIFIABLE)
## 901. NO PROBLEM
## 997. VOLUNTEERED: REFUSED
## 998. VOLUNTEERED: DON'T KNOW
## 999. MISSING

table(dat2$C3001_1)
nrow(dat2)
dat3<-dat2[ which(dat2$C3001_1<900), ]
nrow(dat3) ## 

##############  cross-tabs

library(dplyr)
count(x = dat3, family, wt = weight)

library(expss)
    
    fre(dat3$family, weight = dat3$weight) ##2.4 % note slightly different due to weights
    fre(dat3$family2, weight = dat3$weight) ## 3.9 %


    fre(dat3$family[dat3$female==1], weight = dat3$weight[dat3$female==1]) ##3.2\% of women
    fre(dat3$family[dat3$female==0], weight = dat3$weight[dat3$female==0]) ##1.4\% of men

    fre(dat3$family2[dat3$female==1], weight = dat3$weight[dat3$female==1]) ##5.2\% of women
    fre(dat3$family2[dat3$female==0], weight = dat3$weight[dat3$female==0]) ##2.6\% of men

#### Among those who list work-family policy how many are women 
    fre(dat3$female[dat3$family==1], weight = dat3$weight[dat3$family==1]) ##70% women
    fre(dat3$female[dat3$family2==1], weight = dat3$weight[dat3$family2==1]) ##67% women

table(dat3$female) ### numbers of men and women

##########################
######## Factor analysis #
##########################

load("factor_analysis.RData")
nrow(newdat) ## Dataset of 3 survey waves, 16 countries
head(newdat)
##subset to relevant variables 

table(newdat$S002EVS)

###this is the weight 
summary(newdat$S017)
newdat$weight<-newdat$S017

vars<-c("state_responsibility", "govt_ownership", "competition", 
"child_suffers", "working_mom_relationship", "job_ok")
dat3<-na.omit(newdat[vars])
nrow(dat3)
head(dat3)

library(Hmisc)
label(dat3$state_responsibility) <- "State responsibility"
label(dat3$govt_ownership) <- "Government ownership"
label(dat3$competition) <- "Competition"
label(dat3$child_suffers) <- "PreK child suffers"
label(dat3$working_mom_relationship) <- "Working mom relationship"
label(dat3$job_ok) <- "Women: job vs. home"

describe(dat3)

##Split data into one set for EFA, one for CFA 

## 50% of the sample size
smp_size <- floor(0.5 * nrow(dat3))
## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(dat3)), size = smp_size)
train <- dat3[train_ind, ]
test <- dat3[-train_ind, ]
nrow(train)
nrow(test)

################
################
################
##PCA
################
################
################

library(FactoMineR)
library(factoextra)

result <- PCA(train)

plot.PCA(result, axes=c(1, 2), choix="var") +
  theme_minimal() 

# compute PCA
set.seed(02345)
res.pca <- PCA(train)
#
# extract some parts for plotting
PC1 <- res.pca$ind$coord[,1]
PC2 <- res.pca$ind$coord[,2]
labs <- rownames(res.pca$ind$coord)
PCs <- data.frame(cbind(PC1,PC2))
rownames(PCs) <- labs
#
# Just showing the individual samples...
ggplot(PCs, aes(PC1,PC2, label=rownames(PCs))) + 
  geom_text() 
#
# Now get supplementary categorical variables
cPC1 <- res.pca$quali.sup$coor[,1]
cPC2 <- res.pca$quali.sup$coor[,2]
clabs <- rownames(res.pca$quali.sup$coor)
cPCs <- data.frame(cbind(cPC1,cPC2))
rownames(cPCs) <- clabs
colnames(cPCs) <- colnames(PCs)
#
# Put samples and categorical variables (ie. grouping
# of samples) all together
p <- ggplot() + theme(aspect.ratio=1) + theme_bw(base_size = 20) 
# no data so there's nothing to plot...
# add on data 
p <- p + geom_text(data=PCs, aes(x=PC1,y=PC2,label=rownames(PCs)), size=4) 
p <- p + geom_text(data=cPCs, aes(x=cPC1,y=cPC2,label=rownames(cPCs)),size=10)
p # show plot with both layers
#
# clear the plot
dev.off()
#
# Now extract variables
#
vPC1 <- res.pca$var$coord[,1]
vPC2 <- res.pca$var$coord[,2]
vlabs <- rownames(res.pca$var$coord)
vPCs <- data.frame(cbind(vPC1,vPC2))
# rownames(vPCs) <- vlabs
rownames(vPCs) <- c("State responsibility", "Government ownership", "Competition", "PreK child suffers", "Working mom relationship", "Women: job vs. home")
colnames(vPCs) <- colnames(PCs)
#
# and plot them
#
pv <- ggplot() + theme(aspect.ratio=1) + theme_bw(base_size = 20) 
# no data so there's nothing to plot
# put a faint circle there, as is customary
angle <- seq(-pi, pi, length = 50) 
df <- data.frame(x = sin(angle), y = cos(angle)) 
pv <- pv + geom_path(aes(x, y), data = df, colour="grey70") 
#
# add on arrows and variable labels
pv <- pv + geom_text(data=vPCs, aes(x=vPC1,y=vPC2,label=rownames(vPCs)), size=4, ,position=position_jitter(width=.2,height=.2)) + xlab("Dim 1 (28.8%)") + ylab("Dim 2 (26.4%)")
pv <- pv + geom_segment(data=vPCs, aes(x = 0, y = 0, xend = vPC1*0.9, yend = vPC2*0.9), arrow = arrow(length = unit(1/2, 'picas')), color = "grey30")
pv # show plot
pv +  theme_minimal() + ggtitle("Exploratory Factor Analysis: Variables Factor Map")


## http://factominer.free.fr/factomethods/principal-components-analysis.html


#####################
##### Table 3.2 ######
######################

result$var$coord ##factor loadings on 5 dimensions

eigenvalues <- result$eig
head(eigenvalues[, 1:2])


###scree plot
##shows that eigenvalues drop after second factor 

Scree.Plot <- function(R,main="Scree Plot",sub=NULL){
  roots <- eigen(R)$values
  x <- 1:dim(R)[1]
  plot(x,roots,type="b",col='blue',ylab="Eigenvalue",
       xlab="Component Number",main=main,sub=sub) 
  abline(h=1,lty=2,col="red")
  
}


R <- cor(train)

Scree.Plot(R,main ="Scree Plot")

##supports two dimensions

################
################
##EFA (using half dataset)
################
################

library(polycor)
poly2<-hetcor(train) ##polychoric matrix

fa <- factanal(train, factor=2)
fa
##results hold up to maximum likelihood extraction

# plot factor 1 by factor 2 

load <- fa$loadings[,1:2] 
plot(load,type="p", pch=16, xlim=c(-.1, 1), ylim=c(-.1, 0.7)
) # set up plot 
text(load, labels=names(train) , cex=.7, pos=3)
abline(h=0, v=0)

##use psych package to do polychoric correlation matrix
## (as a check)

library(psych)
faPCdirect <- fa.poly(train, nfactors=2, rotate="Promax", fm="pc")    # polychoric FA
faPCdirect$fa$loadings   
factor.plot(faPCdirect$fa)
fa.diagram(faPCdirect)

################
################
##### Table 2.3: CFA 
## using other half of data 
################
################

library(lavaan)

HS.model <- 'left_right =~ state_responsibility + govt_ownership + competition
              maternal_emp =~ child_suffers + working_mom_relationship + job_ok'
fit <- cfa(HS.model, data = test)
summary(fit, fit.measures = TRUE)
standardizedsolution(fit) ##see loadings when not constrained 
#inspect(fit, "coefficients")$psi 
#covariance factors

##covariance is .02, RMSEA is 0.02, CFI is 0.99 and TLI is 0.99

##################
##################
## CFA Robustness Checks
##################
##################

vars<-c("state_responsibility", "govt_ownership", "competition", 
"child_suffers", "working_mom_relationship", "job_ok", "S002EVS")
dat3<-na.omit(newdat[vars])
nrow(dat3)

##Split data into one set for EFA, one for CFA 

## 50% of the sample size
smp_size <- floor(0.5 * nrow(dat3))
## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(dat3)), size = smp_size)
train <- dat3[train_ind, ]
test <- dat3[-train_ind, ]
nrow(train)
nrow(test)

##by survey wave -- can test iteratively

table(test$S002EVS)
X2 <- test[ which(test$S002EVS=="1990-1993"), ]

HS.model <- 'left_right =~ state_responsibility + govt_ownership + competition
              maternal_emp =~ child_suffers + working_mom_relationship + job_ok'
fit <- cfa(HS.model, data = X2)
summary(fit, fit.measures = TRUE)
standardizedsolution(fit) ##see loadings when not constrained 

### repeat for other waves
#X2 <- test[ which(test$S002EVS=="1999-2001"), ]
#X2 <- test[ which(test$S002EVS=="2008-2010"), ]

####################
####################
###By country 

vars<-c("state_responsibility", "govt_ownership", "competition", 
"child_suffers", "working_mom_relationship", "job_ok", "S002EVS", "S003")
dat4<-na.omit(newdat[vars])
nrow(dat4)

##Split data into one set for EFA, one for CFA 

## 50% of the sample size
smp_size <- floor(0.5 * nrow(dat4))
## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(dat4)), size = smp_size)
train <- dat4[train_ind, ]
test <- dat4[-train_ind, ]
nrow(train)
nrow(test)

X3 <- test[ which(test$S003=="United States"), ]
nrow(X3)
table(X3$S002EVS)

##repeat for all countries: Austria, Belgium, Denmark, Finland, France, Germany 
##Greece, Ireland, Iceland, Italy, Lux, Netherlands, Portugal, Spain, Sweden, GB
##Norway, Switzerland, US

HS.model <- 'left_right =~ state_responsibility + govt_ownership + competition
              maternal_emp =~ child_suffers + working_mom_relationship + job_ok'
fit <- cfa(HS.model, data = X3)
summary(fit, fit.measures = TRUE)
standardizedsolution(fit) ##see loadings when not constrained 

##################
## Figure 3.4:  Marginal effects of gender on preferences within parties 
##################

### open 2002 FCG data and code it 

library(foreign)
mydata <- read.dta("ZA3880.dta")
head(mydata)
nrow(mydata)

table(mydata$COUNTRY) 
### Drop non-OECD
mydata <- subset(mydata, COUNTRY == "Australia (AU)" | COUNTRY == "Germany (West) (DE-W)" | COUNTRY == "Germany (East) (DE-E)" |  COUNTRY == "Great Britain (GB-GBN)" 
 | COUNTRY == "United States (US)" |  COUNTRY == "Austria (AT)"  | COUNTRY == "Ireland (IE)" |  COUNTRY == "Netherlands (NL)"  
| COUNTRY == "Norway (NO)" |  COUNTRY == "Sweden (SE)"  | COUNTRY == "New Zealand (NZ)" |  COUNTRY == "Japan (JP)"  
| COUNTRY == "Spain (ES)" |  COUNTRY == "France (FR)"  | COUNTRY == "Portugal (PT)" | COUNTRY ==  "Denmark (DK)"  | COUNTRY == "Switzerland (CH)" |  COUNTRY == "Belgium/ Flanders (BE-FLA)" )

table(mydata$COUNTRY) 

## weight is v361
mydata$weight<-mydata$v361

##gender is v200, 1 is male, 2 is female
mydata$gender<-NA
mydata$gender[mydata$v200=="Male"]<-0
mydata$gender[mydata$v200=="Female"]<-1
table(mydata$gender)

##age
table(mydata$v201)
mydata$age<-mydata$v201
 
##highedu
table(mydata$v205)
mydata$highedu<-0
mydata$highedu[mydata$v205=="Above higher sec level,below full uni"]<-1
mydata$highedu[mydata$v205=="University degree completed"]<-1
table(mydata$highedu)

##
table(mydata$v253)
mydata$lr<-NA
mydata$lr[mydata$v253=="Right,conservative"]<-0
mydata$lr[mydata$v253=="Far right etc"]<-0
mydata$lr[mydata$v253=="Far left etc"]<-1
mydata$lr[mydata$v253=="Left,center left"]<-1
table(mydata$lr)


##
table(mydata$v244)
mydata$supervise<-0
mydata$supervise[mydata$v244=="Yes,supervises"]<-1
table(mydata$supervise)

##
table(mydata$v242)
mydata$selfemp<-0
mydata$selfemp[mydata$v242=="Self employed"]<-1
table(mydata$selfemp)

table(mydata$v239)
mydata$unemp<-0
mydata$unemp[mydata$v239=="Unemployed"]<-1
table(mydata$unemp)

mydata$publicsec<-0
mydata$publicsec[mydata$v242=="Work f government"]<-1
mydata$publicsec[mydata$v242=="Public owned firm,nat.ind"]<-1
table(mydata$publicsec)

table(mydata$v239)
mydata$parttime<-0
mydata$parttime[mydata$v239=="Employed-part time"]<-1
mydata$parttime[mydata$v239=="Empl-< part-time"]<-1
table(mydata$parttime)

mydata$retired<-0
mydata$retired[mydata$v239=="Retired"]<-1
table(mydata$retired)

mydata$nolab<-0
mydata$nolab[mydata$v239=="Oth,not i labour force"]<-1
table(mydata$nolab)

## do not include class or rural-urban bc don't include in ROG -- let's code it here anyway
## to see if it makes a difference to results 

table(mydata$v358)
mydata$rural<-0
mydata$rural[mydata$v358=="Country village"]<-1
mydata$rural[mydata$v358=="Farm or home in the country"]<-1
table(mydata$rural)

table(mydata$v291)
mydata$lowclass<-0
mydata$lowclass[mydata$v291<5]<-1
table(mydata$lowclass) 

mydata$highclass<-0
mydata$highclass[mydata$v291>6]<-1
table(mydata$highclass) 

table(mydata$v4)
mydata$working_warm<-0
mydata$working_warm[mydata$v4=="Strongly agree"]<-1
mydata$working_warm[mydata$v4=="Agree"]<-1
table(mydata$working_warm)
### agree that working mother can establish just as warm a relationship 

## prek child will suffer is v5 (1 is disagree)
table(mydata$v5)
mydata$matemp<-0
mydata$matemp[mydata$v5=="Disagree"]<-1
mydata$matemp[mydata$v5=="Strongly disagree"]<-1
table(mydata$matemp)

#### family life suffers when women working 
table(mydata$v6)
mydata$family_suffers<-0
mydata$family_suffers[mydata$v6=="Strongly disagree"]<-1
mydata$family_suffers[mydata$v6=="Disagree"]<-1
table(mydata$family_suffers)

#### job is ok but women really want home and kids == disagree
table(mydata$v7)
mydata$job_ok<-0
mydata$job_ok[mydata$v7=="Strongly disagree"]<-1
mydata$job_ok[mydata$v7=="Disagree"]<-1
table(mydata$job_ok)

## housewife fulfilling 
table(mydata$v8)
mydata$fulfill<-0
mydata$fulfill[mydata$v8=="Disagree"]<-1
mydata$fulfill[mydata$v8=="Strongly disagree"]<-1
table(mydata$fulfill)

##work best for women's independence
table(mydata$v9)
mydata$indepen<-0
mydata$indepen[mydata$v9=="Agree"]<-1
mydata$indepen[mydata$v9=="Strongly agree"]<-1
table(mydata$indepen)

## both should contribute to household income
table(mydata$v10)
mydata$bothcon<-0
mydata$bothcon[mydata$v10=="Agree"]<-1
mydata$bothcon[mydata$v10=="Strongly agree"]<-1
table(mydata$bothcon)

## men's job work, women household
table(mydata$v11)
mydata$genderroles<-0
mydata$genderroles[mydata$v11=="Disagree"]<-1
mydata$genderroles[mydata$v11=="Strongly disagree"]<-1
table(mydata$genderroles)

##men should do more household work 
table(mydata$v12)
mydata$menhouse<-0
mydata$menhouse[mydata$v12=="Agree"]<-1
mydata$menhouse[mydata$v12=="Strongly agree"]<-1
table(mydata$menhouse)

## men should do more childcare
table(mydata$v13)
mydata$mencare<-0
mydata$mencare[mydata$v13=="Agree"]<-1
mydata$mencare[mydata$v13=="Strongly agree"]<-1
table(mydata$mencare)

## v27 paid maternity leave
table(mydata$v27)
mydata$matleave<-0
mydata$matleave[mydata$v27=="Agree"]<-1
mydata$matleave[mydata$v27=="Strongly agree"]<-1
table(mydata$matleave)

##v28 benefits for child care 
table(mydata$v28)
mydata$childbens<-0
mydata$childbens[mydata$v28=="Agree"]<-1
mydata$childbens[mydata$v28=="Strongly agree"]<-1
table(mydata$childbens)

##When youngest child is below school age women should
##work full or part time
table(mydata$v15)
mydata$fullpart<-0
mydata$fullpart[mydata$v15=="Work full-time"]<-1
mydata$fullpart[mydata$v15=="Work part-time"]<-1
table(mydata$fullpart)


######## Construct datasets for L and R parties 
mydata_l <- subset(mydata, lr == 1 )
mydata_r <- subset(mydata, lr == 0 )
table(mydata$v253)
table(mydata_l$v253)
table(mydata_r$v253)


###########################
## Analysis within parties 
###########################

library(Zelig)

#### Working mother can have just as warm relationship 


################ Look within L 
set.seed(344425)

zelig.out <- zelig(working_warm ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # 

################ Look within R

zelig.out2 <- zelig(working_warm ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_r, model = "probit", weights = "weight")
summary(zelig.out2)

###Female gender
x.low<-setx(zelig.out2, gender=0)
x.high<-setx(zelig.out2, gender=1)
s.out1 <- sim(zelig.out2, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 


############################
#### prek child will suffer
############################

################ Look within L 

zelig.out <- zelig(matemp ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # 

################ Look within R

zelig.out2 <- zelig(matemp ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_r, model = "probit", weights = "weight")
summary(zelig.out2)

###Female gender
x.low<-setx(zelig.out2, gender=0)
x.high<-setx(zelig.out2, gender=1)
s.out1 <- sim(zelig.out2, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 


############################
#### job is ok but women really want home and family (disagree)
############################

################ Look within L 

zelig.out <- zelig(job_ok ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # 

################ Look within R

zelig.out2 <- zelig(job_ok  ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_r, model = "probit", weights = "weight")
summary(zelig.out2)

###Female gender
x.low<-setx(zelig.out2, gender=0)
x.high<-setx(zelig.out2, gender=1)
s.out1 <- sim(zelig.out2, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 


############################
#### housewife fulfilling (disagree)
############################

################ Look within L 

zelig.out <- zelig(fulfill ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # 

################ Look within R

zelig.out2 <- zelig(fulfill   ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_r, model = "probit", weights = "weight")
summary(zelig.out2)

###Female gender
x.low<-setx(zelig.out2, gender=0)
x.high<-setx(zelig.out2, gender=1)
s.out1 <- sim(zelig.out2, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 


############################
#### man should work woman family 
############################

################ Look within L

zelig.out <- zelig(genderroles ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # 

################ Look within R

zelig.out2 <- zelig(genderroles ~ gender + age + highedu + 
 supervise + selfemp + unemp + parttime + publicsec + retired + rural + 
nolab + lowclass + highclass,
data = mydata_r, model = "probit", weights = "weight")
summary(zelig.out2)

###Female gender
x.low<-setx(zelig.out2, gender=0)
x.high<-setx(zelig.out2, gender=1)
s.out1 <- sim(zelig.out2, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 

###############################
###############################
### Now use the 2012 survey for question on paid leave 
###############################
###############################

library(foreign)
mydata2 <- read.dta("ZA5900_2.dta")
head(mydata2)
nrow(mydata2)

table(mydata2$V4)

### Drop non-OECD
mydata2 <- subset(mydata2, V4== "AU-Australia" | V4== "AT-Austria" | 
V4== "BE-Belgium" | V4== "CA-Canada" | 
V4== "DK-Denmark" | V4== "FR-France" | 
V4== "DE-Germany" | V4== "IE-Ireland" | 
V4== "JP-Japan" | V4== "NL-Netherlands" | 
V4== "PT-Portugal" | V4== "ES-Spain" | 
V4== "SE-Sweden" | V4== "CH-Switzerland" | 
V4== "GB-Great Britain and/or United Kingdom" | V4== "US-United States" |
V4== "NO-Norway" )

table(mydata2$V4) 

## weight is WEIGHT
mydata2$weight<-mydata2$WEIGHT

##gender is SEX, 1 is male, 2 is female
table(mydata2$SEX)
mydata2$gender<-NA
mydata2$gender[mydata2$SEX=="Male"]<-0
mydata2$gender[mydata2$SEX=="Female"]<-1
table(mydata2$gender)

##age
table(mydata2$AGE)
mydata2$age<-mydata2$AGE
mydata2$age[mydata2$AGE==999]<-NA
table(mydata2$age)
 
##highedu
table(mydata2$DEGREE)
mydata2$highedu<-0
mydata2$highedu[mydata2$DEGREE=="Lower level tertiary, first stage (also technical schools at a tertiary level)"]<-1
mydata2$highedu[mydata2$DEGREE=="Upper level tertiary (Master, Dr.)"]<-1
table(mydata2$highedu)

##
table(mydata2$PARTY_LR)
mydata2$lr<-NA
mydata2$lr[mydata2$PARTY_LR=="Right, conservative"]<-0
mydata2$lr[mydata2$PARTY_LR=="Far right (fascist etc.)"]<-0
mydata2$lr[mydata2$PARTY_LR=="Far left (communist etc.)"]<-1
mydata2$lr[mydata2$PARTY_LR=="Left, center left"]<-1
table(mydata2$lr)

##
table(mydata2$PARTY_LR)
mydata2$lr2<-NA
mydata2$lr2[mydata2$PARTY_LR=="Right, conservative"]<-0
mydata2$lr2[mydata2$PARTY_LR=="Left, center left"]<-1
table(mydata2$lr2)


##
table(mydata2$WRKSUP)
mydata2$supervise<-0
mydata2$supervise[mydata2$WRKSUP=="Yes"]<-1
table(mydata2$supervise)

##
table(mydata2$EMPREL)
mydata2$selfemp<-0
mydata2$selfemp[mydata2$EMPREL=="Self-employed without employees"]<-1
mydata2$selfemp[mydata2$EMPREL=="Self-employed with employees"]<-1
table(mydata2$selfemp)

table(mydata2$MAINSTAT)
mydata2$unemp<-0
mydata2$unemp[mydata2$MAINSTAT=="Unemployed and looking for a job, HR: incl never had a job"]<-1
table(mydata2$unemp)

mydata2$retired<-0
mydata2$retired[mydata2$MAINSTAT=="Retired"]<-1
table(mydata2$retired)

mydata2$nolab<-0
mydata2$nolab[mydata2$MAINSTAT=="Other"]<-1
table(mydata2$nolab)

table(mydata2$TYPORG2)
mydata2$publicsec<-0
mydata2$publicsec[mydata2$TYPORG2=="Public employer"]<-1
table(mydata2$publicsec)

table(mydata2$WRKHRS)
mydata2$parttime<-0
mydata2$parttime[mydata2$WRKHRS>1]<-1
mydata2$parttime[mydata2$WRKHRS>35]<-0
table(mydata2$parttime)

## do not include class or rural-urban bc don't include in ROG -- let's code it here anyway
## to see if it makes a difference to results 

table(mydata2$URBRURAL)
mydata2$rural<-0
mydata2$rural[mydata2$URBRURAL=="A country village"]<-1
mydata2$rural[mydata2$URBRURAL=="A farm or home in the country"]<-1
table(mydata2$rural)

table(mydata2$TOPBOT)
mydata2$lowclass<-0
mydata2$lowclass[mydata2$TOPBOT=="Lowest, Bottom, 01"]<-1
mydata2$lowclass[mydata2$TOPBOT=="02"]<-1
mydata2$lowclass[mydata2$TOPBOT=="03"]<-1
mydata2$lowclass[mydata2$TOPBOT=="04"]<-1
mydata2$lowclass[mydata2$TOPBOT=="Not available: GB,US"]<-NA
table(mydata2$lowclass)

mydata2$highclass<-0
mydata2$highclass[mydata2$TOPBOT=="Highest, Top, 10"]<-1
mydata2$highclass[mydata2$TOPBOT=="09"]<-1
mydata2$highclass[mydata2$TOPBOT=="09"]<-1
mydata2$highclass[mydata2$TOPBOT=="07"]<-1
mydata2$highclass[mydata2$TOPBOT=="Not available: GB,US"]<-NA
table(mydata2$highclass)

####Other DVs: 

### Workg mom: warm relation child ok
table(mydata2$V4)
mydata2$working_warm<-0


## 

##paid leave 
table(mydata2$V28)
mydata2$paid_leave<-0
mydata2$paid_leave[mydata2$V28==6]<-1
mydata2$paid_leave[mydata2$V28==7]<-1
mydata2$paid_leave[mydata2$V28==8]<-1
mydata2$paid_leave[mydata2$V28==9]<-1
mydata2$paid_leave[mydata2$V28==10]<-1
mydata2$paid_leave[mydata2$V28==11]<-1
mydata2$paid_leave[mydata2$V28==12]<-1
table(mydata2$paid_leave)

table(mydata2$V28)
mydata2$paid_leave12<-0
mydata2$paid_leave12[mydata2$V28==1]<-1
mydata2$paid_leave12[mydata2$V28==2]<-1
mydata2$paid_leave12[mydata2$V28==3]<-1
mydata2$paid_leave12[mydata2$V28==4]<-1
mydata2$paid_leave12[mydata2$V28==5]<-1
mydata2$paid_leave12[mydata2$V28==6]<-1
mydata2$paid_leave12[mydata2$V28==7]<-1
mydata2$paid_leave12[mydata2$V28==8]<-1
mydata2$paid_leave12[mydata2$V28==9]<-1
mydata2$paid_leave12[mydata2$V28==10]<-1
mydata2$paid_leave12[mydata2$V28==11]<-1
mydata2$paid_leave12[mydata2$V28==12]<-1
table(mydata2$paid_leave12)

table(mydata2$V28)
mydata2$paid_leave3<-0
mydata2$paid_leave3[mydata2$V28==4]<-1
mydata2$paid_leave3[mydata2$V28==5]<-1
mydata2$paid_leave3[mydata2$V28==6]<-1
mydata2$paid_leave3[mydata2$V28==7]<-1
mydata2$paid_leave3[mydata2$V28==8]<-1
mydata2$paid_leave3[mydata2$V28==9]<-1
mydata2$paid_leave3[mydata2$V28==10]<-1
mydata2$paid_leave3[mydata2$V28==11]<-1
mydata2$paid_leave3[mydata2$V28==12]<-1
table(mydata2$paid_leave3)
### a variable coded leave from 4 to 12 months. 

##paid leave 
table(mydata2$V28)
mydata2$paid_leave4<-0
mydata2$paid_leave4[mydata2$V28==6]<-1
mydata2$paid_leave4[mydata2$V28==7]<-1
mydata2$paid_leave4[mydata2$V28==8]<-1
mydata2$paid_leave4[mydata2$V28==9]<-1
mydata2$paid_leave4[mydata2$V28==10]<-1
mydata2$paid_leave4[mydata2$V28==11]<-1
mydata2$paid_leave4[mydata2$V28==12]<-1
mydata2$paid_leave4[mydata2$V28==98]<-NA
mydata2$paid_leave4[mydata2$V28==99]<-NA
table(mydata2$paid_leave4)
##  take out 98 and 99... 

### paid leave split
table(mydata2$V30)
mydata2$paid_split<-0
mydata2$paid_split[mydata2$V30=="Mother and father half"]<-1
table(mydata2$paid_split)

######## Construct datasets for L and R parties 
mydata2_l <- subset(mydata2, lr == 1 )
mydata2_r <- subset(mydata2, lr == 0 )
table(mydata2$PARTY_LR)
table(mydata2_l$PARTY_LR)
table(mydata2_r$PARTY_LR)

###################################
###################################

## Paid leave 

######################
######################
## Look within L
######################
######################

zelig.out <- zelig(paid_leave ~ gender + age + highedu +
 supervise + selfemp + unemp + parttime + publicsec + retired + 
nolab + highclass + lowclass + rural ,
data = mydata2_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # there is a 4% gap and it is sign.

######################
######################
## Look within R
######################
######################

zelig.out <- zelig(paid_leave ~ gender + age + highedu +
 supervise + selfemp + unemp + parttime + publicsec + retired + 
nolab + highclass + lowclass + rural ,
data = mydata2_r, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) # there is a 5% gap and it is sig


###################################
###################################

## Paid leave split

######################
######################
## Look within L
######################
######################

zelig.out <- zelig(paid_split ~ gender + age + highedu +
 supervise + selfemp + unemp + parttime + publicsec + retired + 
nolab + highclass + lowclass + rural ,
data = mydata2_l, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 


######################
######################
## Look within R
######################
######################

zelig.out <- zelig(paid_split ~ gender + age + highedu +
 supervise + selfemp + unemp + parttime + publicsec + retired + 
nolab + highclass + lowclass + rural ,
data = mydata2_r, model = "probit", weights = "weight")
summary(zelig.out)

###Female gender
x.low<-setx(zelig.out, gender=0)
x.high<-setx(zelig.out, gender=1)
s.out1 <- sim(zelig.out, x = x.low, x1 = x.high, param=NULL)
summary(s.out1) 

#### Now make a plot 

mydata4 = read.csv("within_party3.csv")
head(mydata4)

levels(mydata4$Party)
table(mydata4$Party)
mydata4$Party2<-NA
mydata4$Party2[mydata4$Party=="L"]<-"Left"
mydata4$Party2[mydata4$Party=="R"]<-"Right"

mydata4$Party2 <- factor(mydata4$Party2, levels = c("Right", "Left"))

mydata4$Party<-NULL
mydata4$Party<-mydata4$Party2 

library(ggplot2)
library(gridExtra)

tiff("Fig3_4_v2.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata4, aes( x = factor(Model), y = Coefficient, group=Party, color=Party)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = UB, ymin = LB), size=1, width=0) + 
  xlab("") + ylab("")  + scale_x_discrete(labels= mydata4$Q) + scale_y_continuous(breaks = seq(-.05, .2, by = .02)) +
  stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="black") +
  geom_hline(aes(yintercept=0), colour="black", linetype="solid") + theme_classic() +
  theme(plot.title = element_text(size = 20, face = "bold", vjust = 1.5),
        axis.title.y = element_text(colour="grey20",size=14,face="bold"),
        axis.text.x = element_text(colour="grey20",size=12,face="bold"),
        axis.text.y = element_text(colour="grey20",size=12,face="bold"),  
        axis.title.x = element_text(colour="grey20",size=14,face="bold"), 
        axis.ticks.y = element_blank())

bp + coord_flip() + scale_colour_grey() 
dev.off()

############################ PDF  

pdf("Fig3_4_v2.pdf", width = 10, height = 5)

bp<-ggplot(mydata4, aes( x = factor(Model), y = Coefficient, group=Party, color=Party)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = UB, ymin = LB), size=1, width=0) + 
  xlab("") + ylab("")  + scale_x_discrete(labels= mydata4$Q) + scale_y_continuous(breaks = seq(-.05, .2, by = .02)) +
  stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="black") +
  geom_hline(aes(yintercept=0), colour="black", linetype="solid") + theme_classic() +
  theme(plot.title = element_text(size = 20, face = "bold", vjust = 1.5),
        axis.title.y = element_text(colour="grey20",size=14,face="bold"),
        axis.text.x = element_text(colour="grey20",size=12,face="bold"),
        axis.text.y = element_text(colour="grey20",size=12,face="bold"),  
        axis.title.x = element_text(colour="grey20",size=14,face="bold"), 
        axis.ticks.y = element_blank())

bp + coord_flip() + scale_colour_grey() 
dev.off()

###########################
###########################
### Chapter 4 #############
###########################
###########################

load("matched_data_0717.RData") ##data is dat2
head(dat2)

## make party quota whole number not ratio, like women in parliament, flfp etc  

dat2$lag1.partyquota<-dat2$lag1.partyquota*100


#################
#### Figure 1, and models to produce data for Fig 1 
#################

fm.1<-lm(per503 ~ factor(party) + factor(year) + lag1.quota + lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.1) 
fm.2<-lm(per504 ~ factor(party) + factor(year) + lag1.quota + lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.2)  

#################

##cluster SE function##

clx <- 
function(fm, dfcw, cluster){
         library(sandwich);library(lmtest)
         M <- length(unique(cluster))   
         N <- length(cluster)           
         K <- fm$rank                        
         dfc <- (M/(M-1))*((N-1)/(N-K))  
         uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
         vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
	return(list(coeftest= coeftest(fm, vcovCL), vcov=vcovCL))
 }

###############
#### Table A4.3 Model 1

C1<- clx(fm.1, 1, dat2$date)
std.err<-sqrt(diag(C1$vcov))
r.est<-cbind(estimate=fm.1$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.1$coefficients/std.err))),5), 
       lower=fm.1$coefficients-1.96*std.err, 
       upper=fm.1$coefficients+1.96*std.err)
       
r.est

##### Table A4.3 Model 2

C2<- clx(fm.2, 1, dat2$date)
std.err2<-sqrt(diag(C2$vcov))
r.est2<-cbind(estimate=fm.2$coefficients, std.err2, 
       pvalue=round(2*(1-pnorm(abs(fm.2$coefficients/std.err2))),5), 
       lower=fm.2$coefficients-1.96*std.err2, 
       upper=fm.2$coefficients+1.96*std.err2)
       
r.est2


#################
## Create Figure 4.1

mydata3 = read.csv("data_for_figure_4_1.csv")
head(mydata3)

library(ggplot2)
library(gridExtra)

tiff("Fig4_1.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("social" = "Equality", "welfare" = "Welfare State Expansion",
                              "lr" = "Left-Right Position"))
bp2

bp3<- bp2 + coord_flip(ylim = c(-5,10)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp3
dev.off()


#################### PDF 

pdf("Fig4_1.pdf", width = 10, height = 5)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("social" = "Equality", "welfare" = "Welfare State Expansion",
                              "lr" = "Left-Right Position"))+
coord_flip(ylim = c(-5,10)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp2
dev.off()

############################
###Table A4.2: Summary statistics, matched data 
library(pastecs)
library(stargazer)
stat.desc(dat2)
stargazer(dat2)

##############################
### Table A4.3 Models 3 and 4 


fm.15<-lm(lag1.pfem_new ~ factor(party) + factor(year) + lag1.quota + lag1.partyquota + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.15) 

fm.16<-lm(per503 ~ factor(party) + factor(year) + lag1.quota + lag1.partyquota + lag1.pfem_new + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.16) 

C16<- clx(fm.16, 1, dat2$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.16$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.16$coefficients/std.err16))),5), 
       lower=fm.16$coefficients-1.96*std.err16, 
       upper=fm.16$coefficients+1.96*std.err16)
       
r.est16


##############################
### Table A4.5 Models 1 and 2

options(scipen=999)


fm.18b<-lm(per503 ~ factor(party) + factor(year) + lag1.quota_shock + lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.18b) ##

C16<- clx(fm.18b, 1, dat2$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.18b$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.18b$coefficients/std.err16))),5), 
       lower=fm.18b$coefficients-1.96*std.err16, 
       upper=fm.18b$coefficients+1.96*std.err16)
       
r.est16

fm.2b<-lm(per504 ~ factor(party) + factor(year) + lag1.quota_shock + lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.2b) ##results are the same when no controls are included 

C16<- clx(fm.2b, 1, dat2$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.2b$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.2b$coefficients/std.err16))),5), 
       lower=fm.2b$coefficients-1.96*std.err16, 
       upper=fm.2b$coefficients+1.96*std.err16)
       
r.est16

############################
##Table A4.5 Model 3: lags and leads

fm.17<-lm(per503 ~ factor(party) + factor(year) + quota_ey3_v2 + quota_ey2_v2 + quota_ey1_v2 + quota_lead1_v2 + quota_lead2_v2 + quota_lead3_v2  
+ lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.17) ##
C1<- clx(fm.17, 1, dat2$date)
std.err<-sqrt(diag(C1$vcov))
r.est<-cbind(estimate=fm.17$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.17$coefficients/std.err))),5), 
       lower=fm.17$coefficients-1.96*std.err, 
       upper=fm.17$coefficients+1.96*std.err)
       
r.est

#################
## Table A4.5 Model 4: Party specific time trends

fm.111<-lm(per503 ~ factor(party)*year + factor(year) + lag1.quota + lag1.partyquota  + lag1.fplabfo + lag1.pervote + lag1.effpar_leg + log(lag1.rgdpecap), data=dat2)
summary(fm.111) 

C3<- clx(fm.111, 1, dat2$date)
std.err3<-sqrt(diag(C3$vcov))
r.est3<-cbind(estimate=fm.111$coefficients, std.err3, 
       pvalue=round(2*(1-pnorm(abs(fm.111$coefficients/std.err3))),5), 
       lower=fm.111$coefficients-1.96*std.err3, 
       upper=fm.111$coefficients+1.96*std.err3)
       
r.est3 

#############################################
############# Analysis of hand-coded data ###
############# on party attention to #########
############# work-family policies ##########

cmp_wf2 = read.csv("Temp_WF2_mar21.csv")


##cluster SE function##

clx <- 
function(fm, dfcw, cluster){
         library(sandwich);library(lmtest)
         M <- length(unique(cluster))   
         N <- length(cluster)           
         K <- fm$rank                        
         dfc <- (M/(M-1))*((N-1)/(N-K))  
         uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
         vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
	return(list(coeftest= coeftest(fm, vcovCL), vcov=vcovCL))
 }


## Original coding of DVs from hand coded data 
## par2$care<-par2$pct105 + par2$pct105_5 + par2$pct106
## par2$allowance<-par2$pct107 + par2$pct108
## par2$eqleave <-(par2parental + par2$pat_leave) - par2$mat_leave
## where parental is 102, paternity is 104 and maternity is 103

library(lmtest)

library(ggplot2)
library(foreign)
library(plyr)
library(dplyr)
library(xtable)
library(TeachingDemos)
library(reporttools)
library(gridExtra)
library(vcd)

library(haven)
library(multiwayvcov)
library(clubSandwich)
library(lmtest)
library(lfe)


### make DVs into whole numbers

cmp_wf2$allowance<-cmp_wf2$allowance*100
cmp_wf2$care<-cmp_wf2$care*100
cmp_wf2$eqleave<-cmp_wf2$eqleave*100

## same for flfp 

cmp_wf2$lag1.fplabfo<-cmp_wf2$lag1.fplabfo*100

## and party quota 

cmp_wf2$lag1.partyquota<-cmp_wf2$lag1.partyquota*100

############ Table A4.2

library(stargazer)
stargazer(cmp_wf2)

############ Table A4.4
##########################

set<-c("allowance", "care", "eqleave", "party", "year", "lag1.quota", 
"lag1.partyquota", "lag1.fplabfo", "lag1.rgdpecap", "lag1.fert2", "lag1.EUmem",
"date")

blah<-na.omit(cmp_wf2[,set])
nrow(blah) ##114

fm.3a<-lm(allowance ~ lag1.quota + lag1.partyquota + lag1.fplabfo  + log(lag1.rgdpecap)+ lag1.fert2 + lag1.EUmem + factor(party) + factor(year), data=blah)
summary(fm.3a) 

vcov1= cluster.vcov(fm.3a, ~blah$date)
r.est3a<-coeftest(fm.3a, vcov1) 
r.est3a

### Now for the CIs
C16<- clx(fm.3a, 1, blah$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.3a$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.3a$coefficients/std.err16))),5), 
       lower=fm.3a$coefficients-1.96*std.err16, 
       upper=fm.3a$coefficients+1.96*std.err16)
       
r.est16

fm.3b<-lm(care ~ lag1.quota  + lag1.partyquota + lag1.fplabfo  + log(lag1.rgdpecap)+ lag1.fert2 + lag1.EUmem  + factor(party) + factor(year), data=blah)
summary(fm.3b) ### this is positive and sig

vcov1= cluster.vcov(fm.3b, ~blah$date)
r.est3b<-coeftest(fm.3b, vcov1) 
r.est3b

### Now for the CIs
C16<- clx(fm.3b, 1, blah$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.3b$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.3b$coefficients/std.err16))),5), 
       lower=fm.3b$coefficients-1.96*std.err16, 
       upper=fm.3b$coefficients+1.96*std.err16)
       
r.est16


fm.3c<-lm(eqleave ~ lag1.quota + lag1.partyquota + lag1.fplabfo  + log(lag1.rgdpecap)+ lag1.fert2 + lag1.EUmem + factor(party) + factor(year), data=blah)
summary(fm.3c) 

vcov1= cluster.vcov(fm.3c, ~blah$date)
r.est3c<-coeftest(fm.3c, vcov1) ## this is pos and sig // still holds with party FE used
r.est3c


### Now for the CIs
C16<- clx(fm.3c, 1, blah$date)
std.err16<-sqrt(diag(C16$vcov))
r.est16<-cbind(estimate=fm.3c$coefficients, std.err16, 
       pvalue=round(2*(1-pnorm(abs(fm.3c$coefficients/std.err16))),5), 
       lower=fm.3c$coefficients-1.96*std.err16, 
       upper=fm.3c$coefficients+1.96*std.err16)
       
r.est16

library(stargazer)

stargazer(fm.3a, fm.3b, fm.3c, omit = c("factor\\(party", "factor\\(year"), title="Regression Results",
se = list(r.est3a[,2], 
r.est3b[,2],
r.est3c[,2]),
align=TRUE, dep.var.labels=c("Family Allowances", "Child Care", "Gender Equality Leave"),
order=c("lag1.quota", "lag1.partyquota", "lag1.fplabfo", "lag1.rgdpecap", "lag1.fert2", "lag1.EUmem"),
 covariate.labels = c("Quota Law_{(t-1)}", "Party Quota_{(t-1)}", " $\female$ Labor Force Part._{(t-1)}",
                            "Log(GDP per capita_{(t-1)})", "Fertility Rate_{(t-1)}", "EU Membership_{(t-1)}"),
omit.stat=c("LL","ser","f"), no.space=TRUE,
add.lines = list(c("Year fixed effects", "Yes", "Yes"),
                           c("Party fixed effects", "Yes", "Yes")))


##### Now create a figure of main results for text 

mydata4z = read.csv("data_for_figure_4_2.csv")
head(mydata4z)

tiff("Fig4_2.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata4z, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3) + 
  scale_colour_grey(end = 0)  + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=20))


bp2<- bp + scale_x_discrete(labels=c("allowance" = "Family Allowances", "care" = "Child Care", "eqleave" = "Gender Equality Leave"))
bp2

bp3<- bp2 + coord_flip(ylim = c(-20,40))+ theme_classic()+ theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp3 
dev.off()

############################ PDF 

mydata4z = read.csv("data_for_presentation_charts2z.csv")
head(mydata4z)

pdf("Fig4_2.pdf", width = 10, height = 5)

bp<-ggplot(mydata4z, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3) + 
  scale_colour_grey(end = 0)  + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=20))


bp2<- bp + scale_x_discrete(labels=c("allowance" = "Family Allowances", "care" = "Child Care", "eqleave" = "Gender Equality Leave"))+
 coord_flip(ylim = c(-20,40))+ theme_classic()+ theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp2
dev.off()



############ Table A4.6

### No covariates  

fm.3a<-lm(allowance ~ lag1.quota + factor(party) + factor(year), data=cmp_wf2)
summary(fm.3a) 

vcov1= cluster.vcov(fm.3a, ~cmp_wf2$date)
r.est3a<-coeftest(fm.3a, vcov1) 
r.est3a

fm.3b<-lm(care ~ lag1.quota  + factor(party) + factor(year), data=cmp_wf2)
summary(fm.3b) 

vcov1= cluster.vcov(fm.3b, ~cmp_wf2$date)
r.est3b<-coeftest(fm.3b, vcov1) 
r.est3b

fm.3c<-lm(eqleave ~ lag1.quota + factor(party) + factor(year), data=cmp_wf2)
summary(fm.3c) 

vcov1= cluster.vcov(fm.3c, ~cmp_wf2$date)
r.est3c<-coeftest(fm.3c, vcov1)
r.est3c

library(stargazer)

stargazer(fm.3a, fm.3b, fm.3c, omit = c("factor\\(party", "factor\\(year"), title="Regression Results",
se = list(r.est3a[,2], 
r.est3b[,2],
r.est3c[,2]),
align=TRUE, dep.var.labels=c("Family Allowances", "Child Care", "Gender Equality Leave"),
#order=c("lag1.quota", "lag1.partyquota", "lag1.fplabfo", "lag1.rgdpecap", "lag1.fert2", "lag1.EUmem"),
 covariate.labels = c("Quota Law_{(t-1)}"),
omit.stat=c("LL","ser","f"), no.space=TRUE,
add.lines = list(c("Year fixed effects", "Yes", "Yes"),
                           c("Party fixed effects", "Yes", "Yes")))


###########################
###########################
### Chapter 5 #############
###########################
###########################

load("merged_oecd_cpds.RData")
head(test)
library(multiwayvcov)
library(lmtest)

### measure of gender equality leave
##test$gender_leave_wks<- NA
##test$gender_leave_wks<- (test$paid_parleave_total_wks + test$paid_daddy_leave_wks)- test$matleave_wks
##head(test)
##summary(test$gender_leave_wks)

### Look at correlation between leave policies 

cor(test$matleave_wks, test$paid_parleave_total_wks, use="complete.obs")
## 0.1088825
cor(test$matleave_wks, test$paid_daddy_leave_wks, use="complete.obs")
## -0.05910989
cor(test$paid_parleave_total_wks, test$paid_daddy_leave_wks, use="complete.obs")
## 0.2367031

## change partyquota variable to whole number, as in the other variables 

test$ll.share_partyquota<-test$ll.share_partyquota*100


############################
############################
#### figure for leave policy over time, spending over time 
#### Replicates Fig 5.1, 5.2
############################
############################

library(ggplot2)
library(foreign)
library(plyr)
library(dplyr)
library(xtable)
library(TeachingDemos)
library(reporttools)
library(gridExtra)
library(vcd)

vars<-c("countryname", "year", "matleave_wks", "quota")
br<-test[vars]
br$Year<-br$year

library(ggplot2)
library(directlabels)
library(dplyr)


#### Family allowances 

mean_data <- group_by(test, year) %>%
             summarise(allowance = mean(allowance, na.rm = TRUE))

tiff("Fig5_1c.tiff", width = 5, height = 5, units = 'in', res = 1200)
ggplot(na.omit(mean_data), aes(x = year, y = allowance)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Family Allowances (% GDP)") +  scale_x_continuous("", breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()

############## PDF


pdf("Fig5_1c.pdf", width = 5, height = 5)
ggplot(na.omit(mean_data), aes(x = year, y = allowance)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Family Allowances (% GDP)") +  scale_x_continuous("", breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()


#### Leave spending

mean_data <- group_by(test, year) %>%
             summarise(leave = mean(leave, na.rm = TRUE))

tiff("Fig5_1d.tiff", width = 5, height = 5, units = 'in', res = 1200)
ggplot(na.omit(mean_data), aes(x = year, y = leave)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Leave Policies (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()

#################################### PDF 

pdf("Fig5_1d.pdf", width = 5, height = 5)
ggplot(na.omit(mean_data), aes(x = year, y = leave)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Leave Policies (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()


### All family policy spending 

mean_data <- group_by(test, year) %>%
             summarise(total_all = mean(total_all, na.rm = TRUE))

tiff("Fig5_1b.tiff", width = 5, height = 5, units = 'in', res = 1200)
ggplot(na.omit(mean_data), aes(x = year, y = total_all)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Family Policies (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()

######################################## PDF 

pdf("Fig5_1b.pdf", width = 5, height = 5)
ggplot(na.omit(mean_data), aes(x = year, y = total_all)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Spending on Family Policies (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()

#### All social policy spending

mean_data <- group_by(test, year) %>%
             summarise(socexp_t_pmp = mean(socexp_t_pmp, na.rm = TRUE))

tiff("Fig5_1a.tiff", width = 5, height = 5, units = 'in', res = 1200)

ggplot(na.omit(mean_data), aes(x = year, y = socexp_t_pmp)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Overall Social Spending (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()

################################# PDF 

pdf("Fig5_1a.pdf", width = 5, height = 5)
ggplot(na.omit(mean_data), aes(x = year, y = socexp_t_pmp)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Overall Social Spending (% GDP)") +  scale_x_continuous("",breaks=seq(1980,2013,5), limits=c(1980,2013))
dev.off()


### Gender equality leave 

mean_data <- group_by(test, year) %>%
             summarise(gender_leave_wks = mean(gender_leave_wks, na.rm = TRUE))

tiff("Fig5_2.tiff", width = 5, height = 5, units = 'in', res = 1200)

library(ggplot2)
ggplot(na.omit(mean_data), aes(x = year, y = gender_leave_wks)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Gender Equality Leave (Weeks)") +  scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2016))
dev.off()

##################################### PDF 

pdf("Fig5_2.pdf", width = 5, height = 5)

library(ggplot2)
ggplot(na.omit(mean_data), aes(x = year, y = gender_leave_wks)) +
  geom_point() + geom_line() +  
  scale_colour_grey(start = 0, end = .2) +
theme_bw() + geom_line(size = 1) +
     theme(legend.position = "none") +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) +
    scale_y_continuous("Gender Equality Leave (Weeks)") +  scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2016))
dev.off()

##########################
## Summary stats #########
## Table A5.1 ############
##########################

## See summary statistics for all variables, divided into different data sets (to match N in regression models)
## spending 

vars<-c("allowance", "total_all",  "lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem",  "socexp_t_pmp", "oldage_pmp")
stats<-na.omit(test[,vars])
stargazer(stats)

## leave spending (690 obs)

vars<-c("leave", "lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem")
stats<-na.omit(test[,vars])
stargazer(stats)

## Summary stats for leave length data DVs

vars<-c("ll.womenpar", "ll.log_gdppc", "ll.flfp",  "ll.gov_left1", 
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "lag1.quota", "lag1.quota_adopt", "lag1.quota_shock", "lag1.quota_shift", 
 "countryname", "paid_daddy_leave_wks",
"matleave_wks", "parental_protected_wks", "paid_parleave_total_wks", "gender_leave_wks")
 
check<-na.omit(test[,vars])
stargazer(check)

### And for education spending DV

vars<-c("lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "edu_lesspreprim")
stats<-na.omit(test[,vars])
stargazer(stats)

## and health care spending DV

vars<-c("lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "health_pmp")
stats<-na.omit(test[,vars])
stargazer(stats)

########################
## Table on public spending ###
## Replicates Fig 5.3, Table A5.2
########################

fm.5<- lm(allowance ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.5)
vcov_country <- cluster.vcov(fm.5, test$countryname)
coeftest(fm.5, vcov_country) 


fm.6<- lm(leave ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.6)
vcov_country <- cluster.vcov(fm.6, test$countryname)
coeftest(fm.6, vcov_country) ## positive but not sig 

fm.7<- lm(total_all ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.7)
vcov_country <- cluster.vcov(fm.7, test$countryname)
coeftest(fm.7, vcov_country) 

########################
## Figure 5.3 
########################
 
library(ggplot2)
library(gridExtra)

mydata3 = read.csv("for_figure_5_3.csv")
head(mydata3)

tiff("Fig5_3.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("allowance" = "Allowances", "leave" = "Leave",
                              "overall" = "Overall family"))
bp2
bp3<- bp2 + coord_flip(ylim = c(-1,0.5)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp3
dev.off()

###################################### PDF 

pdf("Fig5_3.pdf", width = 10, height = 5)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("allowance" = "Allowances", "leave" = "Leave",
                              "overall" = "Overall family"))+
 coord_flip(ylim = c(-1,0.5)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp2
dev.off()

#######################
#### Table on Leave Outcomes #### 
#### Replicates Fig 5.4, and Appendix Table A5.3 Model 1
#######################

vars<-c("gender_leave_wks", "matleave_wks", "paid_parleave_total_wks", "paid_daddy_leave_wks", 
"lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test3<-na.omit(test[,vars])
nrow(test3) ##560 

##install.packages("multiwayvcov")
library(multiwayvcov)
library(lmtest)
library(stargazer) 

fm.5<- lm(gender_leave_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test3)

C1<- clx(fm.5, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est2<-cbind(estimate=fm.5$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.5$coefficients/std.err))),5), 
       lower=fm.5$coefficients-1.96*std.err, 
       upper=fm.5$coefficients+1.96*std.err)
r.est2


######################
######################
## Composite leave policy durations -- Table A5.3 Models 2-4
######################
######################

fm.1<- lm(matleave_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test3)
C1<- clx(fm.1, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est1<-cbind(estimate=fm.1$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.1$coefficients/std.err))),5), 
       lower=fm.1$coefficients-1.96*std.err, 
       upper=fm.1$coefficients+1.96*std.err)
r.est1

fm.2<- lm(paid_parleave_total_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test3)
C1<- clx(fm.2, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est2<-cbind(estimate=fm.2$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.2$coefficients/std.err))),5), 
       lower=fm.2$coefficients-1.96*std.err, 
       upper=fm.2$coefficients+1.96*std.err)
r.est2

fm.3<- lm(paid_daddy_leave_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test3)
C1<- clx(fm.3, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est3<-cbind(estimate=fm.3$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.3$coefficients/std.err))),5), 
       lower=fm.3$coefficients-1.96*std.err, 
       upper=fm.3$coefficients+1.96*std.err)
r.est3

### Not included: weeks of job protected parental leave 
#fm.3<- lm( parental_protected_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
#+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
# + as.factor(year) 
#+ as.factor(countryname2),
#data=test)
#summary(fm.3)
#vcov_country <- cluster.vcov(fm.3, test$countryname)
#coeftest(fm.3, vcov_country) 

#######################
########################
## Table on public spending, other outcomes ###
## Replicates Fig 5.5 and Table A5.4
########################

fm.5b<- lm(socexp_t_pmp ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.5b)
vcov_country <- cluster.vcov(fm.5b, test$countryname)
coeftest(fm.5b, vcov_country) 

fm.6b<- lm(health_pmp ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.6b)
vcov_country <- cluster.vcov(fm.6b, test$countryname)
coeftest(fm.6b, vcov_country) ## positive but not sig 

fm.7b<- lm(oldage_pmp ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.7b)
vcov_country <- cluster.vcov(fm.7b, test$countryname)
coeftest(fm.7b, vcov_country) 

fm.8b<- lm( edu_lesspreprim ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test)
summary(fm.8b)
vcov_country <- cluster.vcov(fm.8b, test$countryname)
coeftest(fm.8b, vcov_country) 

################# Figure 5.5 

mydata3 = read.csv("for_figure_5_5.csv")
head(mydata3)

tiff("Fig5_5.tiff", width = 10, height = 5, units = 'in', res = 1200)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("social" = "Overall Social", "health" = "Health",
                             "old_age" = "Old Age", "education" = "Education"))
bp2
bp3<- bp2 + coord_flip(ylim = c(-2,5)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp3

dev.off()

################################# PDF 


pdf("Fig5_5.pdf", width = 10, height = 5)

bp<-ggplot(mydata3, aes(colour=factor(model), x = factor(model), y = q_coef)) +
  geom_point(size = 3)+ geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymax = ub, ymin = lb), size=1, width=.3)  +
 scale_colour_grey(end = 0) + ylab("Coefficient (95% CIs)") + xlab("") +  theme_classic() + theme(legend.position="none") + theme(text = element_text(size=12))


bp2<- bp + scale_x_discrete(labels=c("social" = "Overall Social", "health" = "Health",
                             "old_age" = "Old Age", "education" = "Education")) + coord_flip(ylim = c(-2,5)) + theme(legend.position="none") + theme(text = element_text(size=20)) + theme(
axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'))
bp2

dev.off()

##########################
### Figure 5.6 
############################
############################

coun3 = read.csv("mean_center.csv") 


####################################
####################################

tiff("Fig5_6a.tiff", width = 5, height = 5, units = 'in', res = 1200)

ggplot(data=coun3, aes(x=quota_center, y=allowance)) +
  geom_point() +
  geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center < 0,]) + geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center > -1,]) + xlab("") + 
ylab("Spending on Family Allowances (% GDP)") + guides(shape=FALSE) + theme(legend.title = element_blank()) +
  ggtitle("") + theme(plot.title = element_text(hjust = 0.5))+ theme_classic()  +
scale_x_continuous(breaks=c(-10,-8, -6, -4, -2, 0, 2, 4), lim = c(-10,4))
dev.off()

####################################################### PDF 

pdf("Fig5_6a.pdf", width = 5, height = 5)

ggplot(data=coun3, aes(x=quota_center, y=allowance)) +
  geom_point() +
  geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center < 0,]) + geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center > -1,]) + xlab("") + 
ylab("Spending on Family Allowances (% GDP)") + guides(shape=FALSE) + theme(legend.title = element_blank()) +
  ggtitle("") + theme(plot.title = element_text(hjust = 0.5))+ theme_classic()  +
scale_x_continuous(breaks=c(-10,-8, -6, -4, -2, 0, 2, 4), lim = c(-10,4))
dev.off()

## now for leave  

tiff("Fig5_6b.tiff", width = 5, height = 5, units = 'in', res = 1200)

ggplot(data=coun3, aes(x=quota_center, y=gender_leave_wks)) +
  geom_point() +
  geom_smooth(method="lm",  colour="black", data=coun3[coun3$quota_center < 0,]) + geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center > -1,]) + xlab("") + 
ylab("Gender Equality Leave Policy (Weeks)") + guides(shape=FALSE) + theme(legend.title = element_blank()) +
  ggtitle("") + theme(plot.title = element_text(hjust = 0.5))+ theme_minimal() +theme_classic() +
scale_x_continuous(breaks=c(-10, -8, -6, -4, -2, 0, 2, 4, 6, 8), lim = c(-10,7))
dev.off()

########################################################## PDF 
pdf("Fig5_6b.pdf", width = 5, height = 5)

ggplot(data=coun3, aes(x=quota_center, y=gender_leave_wks)) +
  geom_point() +
  geom_smooth(method="lm",  colour="black", data=coun3[coun3$quota_center < 0,]) + geom_smooth(method="lm", colour="black", data=coun3[coun3$quota_center > -1,]) + xlab("") + 
ylab("Gender Equality Leave Policy (Weeks)") + guides(shape=FALSE) + theme(legend.title = element_blank()) +
  ggtitle("") + theme(plot.title = element_text(hjust = 0.5))+ theme_minimal() +theme_classic() +
scale_x_continuous(breaks=c(-10, -8, -6, -4, -2, 0, 2, 4, 6, 8), lim = c(-10,7))
dev.off()

#########################
## Table 5.1, calculate change pre-post in quota countries 
#########################

test$allowance[test$countryname=="Belgium" & test$year==2013] - test$allowance[test$countryname=="Belgium" & test$year==1998] 
# -0.236
test$allowance[test$countryname=="Italy" & test$year==1995] - test$allowance[test$countryname=="Italy" & test$year==1993] 
# -0.01
test$allowance[test$countryname=="Portugal" & test$year==2013] - test$allowance[test$countryname=="Portugal" & test$year==2008] 
# -0.089
test$allowance[test$countryname=="Spain" & test$year==2013] - test$allowance[test$countryname=="Spain" & test$year==2007] 
# 0.039
test$allowance[test$countryname=="France" & test$year==2013] - test$allowance[test$countryname=="France" & test$year==2001] 
# 0.096

test$gender_leave_wks[test$countryname=="Belgium" & test$year==2016] - test$gender_leave_wks[test$countryname=="Belgium" & test$year==1998] 
# 10
test$gender_leave_wks[test$countryname=="Italy" & test$year==1995] - test$gender_leave_wks[test$countryname=="Italy" & test$year==1993] 
# 0
test$gender_leave_wks[test$countryname=="Portugal" & test$year==2016] - test$gender_leave_wks[test$countryname=="Portugal" & test$year==2008] 
# 42.4
test$gender_leave_wks[test$countryname=="Spain" & test$year==2016] - test$gender_leave_wks[test$countryname=="Spain" & test$year==2007] 
# 0
test$gender_leave_wks[test$countryname=="France" & test$year==2016] - test$gender_leave_wks[test$countryname=="France" & test$year==2001] 
# 54

test$matleave_wks[test$countryname=="Belgium" & test$year==2016] - test$matleave_wks[test$countryname=="Belgium" & test$year==1998] 
# 0
test$matleave_wks[test$countryname=="Italy" & test$year==1995] - test$matleave_wks[test$countryname=="Italy" & test$year==1993] 
# 0
test$matleave_wks[test$countryname=="Portugal" & test$year==2016] - test$matleave_wks[test$countryname=="Portugal" & test$year==2008] 
# -11.1
test$matleave_wks[test$countryname=="Spain" & test$year==2016] - test$matleave_wks[test$countryname=="Spain" & test$year==2007] 
# 0
test$matleave_wks[test$countryname=="France" & test$year==2016] - test$matleave_wks[test$countryname=="France" & test$year==2001] 
# 0

test$paid_parleave_total_wks[test$countryname=="Belgium" & test$year==2016] - test$paid_parleave_total_wks[test$countryname=="Belgium" & test$year==1998] 
# 4.3
test$paid_parleave_total_wks[test$countryname=="Italy" & test$year==1995] - test$paid_parleave_total_wks[test$countryname=="Italy" & test$year==1993] 
# 0
test$paid_parleave_total_wks[test$countryname=="Portugal" & test$year==2016] - test$paid_parleave_total_wks[test$countryname=="Portugal" & test$year==2008] 
# 13
test$paid_parleave_total_wks[test$countryname=="Spain" & test$year==2016] - test$paid_parleave_total_wks[test$countryname=="Spain" & test$year==2007] 
# 0
test$paid_parleave_total_wks[test$countryname=="France" & test$year==2016] - test$paid_parleave_total_wks[test$countryname=="France" & test$year==2001] 
# 26

test$paid_daddy_leave_wks[test$countryname=="Belgium" & test$year==2016] - test$paid_daddy_leave_wks[test$countryname=="Belgium" & test$year==1998] 
# 5.7
test$paid_daddy_leave_wks[test$countryname=="Italy" & test$year==1995] - test$paid_daddy_leave_wks[test$countryname=="Italy" & test$year==1993] 
# 0
test$paid_daddy_leave_wks[test$countryname=="Portugal" & test$year==2016] - test$paid_daddy_leave_wks[test$countryname=="Portugal" & test$year==2008] 
# 18.3
test$paid_daddy_leave_wks[test$countryname=="Spain" & test$year==2016] - test$paid_daddy_leave_wks[test$countryname=="Spain" & test$year==2007] 
# 0
test$paid_daddy_leave_wks[test$countryname=="France" & test$year==2016] - test$paid_daddy_leave_wks[test$countryname=="France" & test$year==2001] 
# 28

#######################
######### Quota shock -- Table A5.5
#######################

library(multiwayvcov)
library(lmtest)
library(stargazer) 

vars<-c("allowance",  "lag1.quota_shock", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test3<-na.omit(test[,vars])
nrow(test3) 

fm.10<- lm(allowance ~  lag1.quota_shock + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test3)
summary(fm.10)

C1<- clx(fm.10, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est2<-cbind(estimate=fm.10$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.10$coefficients/std.err))),5), 
       lower=fm.10$coefficients-1.96*std.err, 
       upper=fm.10$coefficients+1.96*std.err)
r.est2

vars<-c("gender_leave_wks",  "lag1.quota_shock", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test4<-na.omit(test[,vars])
nrow(test4)


fm.12<- lm(gender_leave_wks ~  lag1.quota_shock + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + as.factor(year) 
+ as.factor(countryname2),
data=test4)
summary(fm.12)

C1<- clx(fm.12, 1, test4$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est3<-cbind(estimate=fm.12$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.12$coefficients/std.err))),5), 
       lower=fm.12$coefficients-1.96*std.err, 
       upper=fm.12$coefficients+1.96*std.err)
r.est3


#################### Table A5.6 Models 1 and 2, no controls 


vars<-c("allowance",  "lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test3<-na.omit(test[,vars])
nrow(test3) ##560 

fm.10<- lm(allowance ~  lag1.quota + as.factor(year) 
+ as.factor(countryname2),
data=test3)
summary(fm.10)

C1<- clx(fm.10, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est2<-cbind(estimate=fm.10$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.10$coefficients/std.err))),5), 
       lower=fm.10$coefficients-1.96*std.err, 
       upper=fm.10$coefficients+1.96*std.err)
r.est2

vars<-c("gender_leave_wks", "matleave_wks", "paid_parleave_total_wks", "paid_daddy_leave_wks", 
"lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test4<-na.omit(test[,vars])
nrow(test4) 

fm.12<- lm(gender_leave_wks ~  lag1.quota  + as.factor(year) 
+ as.factor(countryname2),
data=test4)
summary(fm.12)

C1<- clx(fm.12, 1, test4$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est3<-cbind(estimate=fm.12$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.12$coefficients/std.err))),5), 
       lower=fm.12$coefficients-1.96*std.err, 
       upper=fm.12$coefficients+1.96*std.err)
r.est3

#################
## Table A5.6 Models 3 and 4, Country specific time trends

vars<-c("allowance",  "lag1.quota", "lag1.gov_left1", "ll.log_gdppc", "ll.flfp", "ll.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test3<-na.omit(test[,vars])
nrow(test3) ##

fm.10<- lm(allowance ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1 
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + factor(countryname)*year + factor(year),
data=test3)
summary(fm.10)

C1<- clx(fm.10, 1, test3$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est2<-cbind(estimate=fm.10$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.10$coefficients/std.err))),5), 
       lower=fm.10$coefficients-1.96*std.err, 
       upper=fm.10$coefficients+1.96*std.err)
r.est2

vars<-c("gender_leave_wks", "matleave_wks", "paid_parleave_total_wks", "paid_daddy_leave_wks", 
"lag1.quota", "ll.log_gdppc", "ll.flfp", "ll.gov_left1", "lag1.gov_left1",
"ll.share_partyquota", "ll.fert2", "ll.EUmem", "year", "countryname", "countryname2")
test4<-na.omit(test[,vars])
nrow(test4) ##560 

fm.12<- lm(gender_leave_wks ~  lag1.quota + ll.log_gdppc + ll.flfp + ll.gov_left1
+ ll.share_partyquota  + ll.fert2 + ll.EUmem 
 + factor(countryname)*year + factor(year),
data=test4)
summary(fm.12)

C1<- clx(fm.12, 1, test4$countryname2)
std.err<-sqrt(diag(C1$vcov))
r.est3<-cbind(estimate=fm.12$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.12$coefficients/std.err))),5), 
       lower=fm.12$coefficients-1.96*std.err, 
       upper=fm.12$coefficients+1.96*std.err)
r.est3


###########################
###########################
### Chapter 6 #############
###########################
###########################

load("Chapter6_data.RData")

### subset to one country 

br2<-dat3[ which(dat3$countryname=='Austria'),]
nrow(br2)
## var is pfem_new2, also need partyname
table(br2$partyname)

## change weird partyname using marpor code. it is 42520
br2$partyname[br2$party==42520]<-"Austrian People's Party"
table(br2$partyname)

## Then make partyabbrev & keep 4 parties:  Greens, SPO, OVP, FPO

br3 <- subset(br2, partyname == "The Greens" | partyname == "Austrian Social Democratic Party" | partyname == "Austrian Freedom Party" | partyname == "Austrian People's Party")

br3$partyabbrev<-NA
br3$partyabbrev[br3$partyname=="The Greens"]<-"Greens"
br3$partyabbrev[br3$partyname=="Austrian Social Democratic Party"]<-"SPO"
br3$partyabbrev[br3$partyname=="Austrian Freedom Party"]<-"FPO"
br3$partyabbrev[br3$partyname=="Austrian People's Party"]<-"OVP"

vars<-c("year", "partyabbrev", "pfem_new2")
blah<-br3[,vars]
head(blah)

br2<-na.omit(blah)
nrow(br2)

library(ggplot2)

library(ggplot2)
library(directlabels)

tiff("Fig6_2b.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br3, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) 
p<- p + scale_x_continuous(breaks=seq(1990,2020,5), limits=c(1990,2021)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2010.7, y = 5, label = "<-- Quota Law Implemented in Belgium, 1999")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Austria") + 
scale_colour_grey()
dev.off()

############################################################ PDF 

pdf("Fig6_2b.pdf", width = 5, height = 5)

p <- ggplot(br3, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15)) 
p<- p + scale_x_continuous(breaks=seq(1990,2020,5), limits=c(1990,2024)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2012, y = 5, label = "<-- Quota Law Implemented in Belgium, 1999")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Austria") + 
scale_colour_grey()
dev.off()


#### Now for Belgium 

br2<-dat3[ which(dat3$countryname=='Belgium'),]
nrow(br2)
table(br2$partyname)

## there is an error, 2014 Green party is 50% women not 0%
 br2$pfem_new2[br2$party==21112 & br2$year==2014]<-50
## note that NVA 2003 is correct, only 1 seat and went to male party leader 

br2$partyabbrev<-NA
br2$partyabbrev[br2$party==21111]<-"Ecolo"
br2$partyabbrev[br2$party==21112]<-"Green"
br2$partyabbrev[br2$party==21321]<-"sp.a"
br2$partyabbrev[br2$party==21322]<-"PS"
br2$partyabbrev[br2$party==21421]<-"Open VLD"
br2$partyabbrev[br2$party==21521]<-"CD&V"
br2$partyabbrev[br2$party==21522]<-"Cdh"
br2$partyabbrev[br2$party==21917]<-"VB"
br2$partyabbrev[br2$party==21914]<-"VB"
br2$partyabbrev[br2$party==21916]<-"NVA"


vars<-c("year", "partyabbrev", "pfem_new2")
blah<-br2[,vars]
head(blah)

br2<-na.omit(blah)
nrow(br2)

tiff("Fig6_2a.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br2, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  
p<- p + scale_x_continuous(breaks=seq(1990,2020,5), limits=c(1990,2022)) + scale_y_continuous(limits=c(0,80))+ theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2011, y = 5, label = "<-- Quota Law Implemented in Belgium, 1999")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Belgium")+ 
scale_colour_grey()

dev.off()

############################PDF

pdf("Fig6_2a.pdf", width = 5, height = 5)

p <- ggplot(br2, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  
p<- p + scale_x_continuous(breaks=seq(1990,2020,5), limits=c(1990,2022)) + scale_y_continuous(limits=c(0,80))+ theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2011, y = 5, label = "<-- Quota Law Implemented in Belgium, 1999")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Belgium")+ 
scale_colour_grey()

dev.off()

### Portugal 

look <- read.csv("Portugal_party.csv")
look$party<-as.factor(look$party)

look$include<-1
look$include[look$exclude==1]<-0
table(look$include)

newdata <- look[ which(look$include==1), ]
nrow(look)
nrow(newdata) 

### take out PEV 
newdata <- newdata[ which(newdata$partyabbrev!="PEV"), ]
newdata$pfem_new2<-newdata$share_women


vars<-c("year", "partyabbrev", "pfem_new2")
blah<-newdata[,vars]
head(blah)

br2<-na.omit(blah)
nrow(br2)

tiff("Fig6_4a.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br2, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Party") 
p<- p + scale_x_continuous(breaks=seq(1990,2023,5), limits=c(1990,2025)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 2018, y = 11, label = "<-- Quota Law Implemented in 
Portugal, 2009")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Portugal")+ 
scale_colour_grey()

dev.off()

############################## PDF 

pdf("Fig6_4a.pdf", width = 5, height = 5)

p <- ggplot(br2, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Party") 
p<- p + scale_x_continuous(breaks=seq(1990,2023,5), limits=c(1990,2026)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 2018, y = 11, label = "<-- Quota Law Implemented 
in Portugal, 2009")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Portugal")+ 
scale_colour_grey()

dev.off()


#### Italy 

br2<-dat3[ which(dat3$countryname=='Italy'),]
nrow(br2)

br2$partyabbrev<-NA
br2$partyabbrev[br2$party==32220]<-"PDS/ DS/ PD"
br2$partyabbrev[br2$party==32440]<-"PDS/ DS/ PD"
br2$partyabbrev[br2$party==32610]<-"FI/ PDL"
br2$partyabbrev[br2$party==32061]<-"FI/ PDL"
br2$partyabbrev[br2$party==32720]<-"LN/ L"
br2$partyabbrev[br2$party==32956]<-"M5S"


vars<-c("year", "partyabbrev", "pfem_new2")
blah<-br2[,vars]
head(blah)

br2<-na.omit(blah)
nrow(br2)

br2

br3 <- subset(br2, year<  2018)

tiff("Fig6_4b.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br3, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Party") 
p<- p + scale_x_continuous(breaks=seq(1990,2023,5), limits=c(1990,2025)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 2018, y = 11, label = "<-- Quota Law Implemented in 
Portugal, 2009")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Italy")+ 
scale_colour_grey()
dev.off()

############################################ PDF 


pdf("Fig6_4b.pdf", width = 5, height = 5)

p <- ggplot(br3, aes(year, pfem_new2, group = partyabbrev,
     colour = partyabbrev))  + geom_point(size = 2) + geom_line(size = 0.8)
     theme(legend.position = "none") + 
theme_bw() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Party") 
p<- p + scale_x_continuous(breaks=seq(1990,2023,5), limits=c(1990,2025)) + scale_y_continuous(limits=c(0,80)) + theme_classic()

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 2018, y = 11, label = "<-- Quota Law Implemented
in Portugal, 2009")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + xlab("") + ylab("% Women in Party") + ggtitle("Italy")+ 
scale_colour_grey()
dev.off()

############## Figures 61., 6.3 

load("merged_oecd_cpds.RData")
head(test)


library(ggplot2)
library(foreign)
library(plyr)
library(dplyr)
library(xtable)
library(TeachingDemos)
library(reporttools)
library(gridExtra)
library(vcd)


vars<-c("countryname", "year", "womenpar","quota")
br<-test[vars]
br$Year<-br$year

### Start with Belgium and Austria 

br2<-br[ which(br$countryname=='Austria' &
br$Year > 1989 | br$countryname == 'Belgium'&
br$Year > 1989 ),]
nrow(br2)

library(ggplot2)
library(directlabels)

tiff("Fig6_1.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br2, aes(Year, womenpar, group = countryname,
     colour = countryname))  + geom_point(size = 1.5) + geom_line(size = 0.8) +
     theme(legend.position = "none") + 
theme_classic() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Parliament") +  
scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2021))

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2010, y = 10, label = "<-- Quota Law Implemented in Belgium")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + 
scale_colour_grey()
dev.off()

#################### PDF 

pdf("Fig6_1.pdf", width = 5, height = 5)

p <- ggplot(br2, aes(Year, womenpar, group = countryname,
     colour = countryname))  + geom_point(size = 1.5) + geom_line(size = 0.8) +
     theme(legend.position = "none") + 
theme_classic() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Parliament") +  
scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2021))

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 1999, linetype="dotted", size=1.5) + annotate("text", x = 2010, y = 10, label = "<-- Quota Law Implemented in Belgium")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + 
scale_colour_grey()
dev.off()


### Now Portugal and Italy 

br3<-br[ which(br$countryname=='Portugal' &
br$Year > 1989 | br$countryname == 'Italy'&
br$Year > 1989 ),]
nrow(br3)

library(ggplot2)
library(directlabels)

tiff("Fig6_3.tiff", width = 5, height = 5, units = 'in', res = 1200)

p <- ggplot(br3, aes(Year, womenpar, group = countryname,
     colour = countryname))  + geom_point(size = 1.5) + geom_line(size = 0.8) +
     theme(legend.position = "none") + 
theme_classic() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Parliament") +  
scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2020))

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 1999, y = 6, label = "Quota Law Implemented in Portugal-->")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + 
scale_colour_grey()
dev.off()

##################### PDF 
pdf("Fig6_3.pdf", width = 5, height = 5)

p <- ggplot(br3, aes(Year, womenpar, group = countryname,
     colour = countryname))  + geom_point(size = 1.5) + geom_line(size = 0.8) +
     theme(legend.position = "none") + 
theme_classic() +
theme(text=element_text(color="black")) +
    theme(axis.title=element_text(size=15))  + 
    scale_y_continuous("% Women in Parliament") +  
scale_x_continuous("",breaks=seq(1990,2020,5), limits=c(1990,2020))

p<- p + theme(legend.position = "none") + geom_vline(xintercept = 2009, linetype="dotted", size=1.5) + annotate("text", x = 1999, y = 6, label = "Quota Law Implemented in Portugal-->")
direct.label(p, list('last.bumpup', cex=0.95, fontface="bold")) + 
scale_colour_grey()
dev.off()

###########################
###########################
### Chapter 7 #############
###########################
###########################


library(foreign)
library(ggplot2) 
library(collapse)

### Open ISSP data for 2012

library(haven)
dat2<- read_dta("ZA5900_2.dta")
head(dat2)
nrow(dat2)

## weight

dat2$weight<-dat2$WEIGHT

## gender  
table(dat2$SEX)
dat2$gender<-NA
dat2$gender[dat2$SEX==1]<-0
dat2$gender[dat2$SEX==2]<-1
table(dat2$gender)

## country list 
table(dat2$C_ALPHAN)

### preschool child will suffer (disagree)

table(dat2$V6)
## throw out 8 and 9 
## 4 and 5 are disagree

dat2$matemp<-NA
dat2$matemp[dat2$V6==1]<-0
dat2$matemp[dat2$V6==2]<-0
dat2$matemp[dat2$V6==3]<-0
dat2$matemp[dat2$V6==4]<-1
dat2$matemp[dat2$V6==5]<-1
table(dat2$matemp)

## Spain only 3 and 4 are disagree, 1 and 2 code 0
table(dat2$ES_V6)
dat2$matemp[dat2$ES_V6==1]<-0
dat2$matemp[dat2$ES_V6==2]<-0
dat2$matemp[dat2$ES_V6==3]<-1
dat2$matemp[dat2$ES_V6==4]<-1

table(dat2$matemp)

### calculate gender gaps by country using weights 
### for each country (need to change code below)

table(dat2$C_ALPHAN)
reg<-dat2[which(dat2$C_ALPHAN=="US"), ]
nrow(reg) ## check that nrow matches country

test<- collap(reg, matemp ~ gender, fmean, w = ~ weight, keep.w = FALSE)
test

################
########### Figure 7.1
################

dat2 = read.csv("mat_emp_world_weights.csv")
head(dat2)

dat2$fl<-dat2$flevel*100
dat2$ml<-dat2$mlevel*100
dat2$gap2<-dat2$fl - dat2$ml
head(dat2)
dat2$flevel<-dat2$fl

vars<-c("countryname", "var6", "gap2", "flevel")
dat<-na.omit(dat2[vars])

tiff("Fig7_1.tiff", width = 10, height = 10, units = 'in', res = 1200)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Country (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.35))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=14))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=16))
FINAL2
dev.off()

########################### PDF 

pdf("Fig7_1.pdf", width = 10, height = 10)

plot1<-ggplot(dat, aes(reorder(var6, gap2), gap2, label=gap2)) + 
   geom_bar(fill=c("grey"), stat="identity") + coord_flip() +
   ylab("Average Gender Gaps (% Women -- % Men)") + xlab("")  +
  ggtitle("Country (% Women Disagree)") +
  theme(plot.title = element_text(hjust = -.35))  

p1<- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text=element_text(size=14))

FINAL1<- p1 +theme(axis.text.y = element_text(colour="black"),
                         axis.title.x=element_blank(),
          axis.title.y=element_blank() ,axis.ticks=element_blank())

FINAL2<- FINAL1  + 
     theme(plot.title = element_text(lineheight=.8, face="bold", size=16))
FINAL2
dev.off()

